1 /* 2 * SPDX-FileCopyrightText: Copyright (c) 2013-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. 3 * SPDX-License-Identifier: MIT 4 * 5 * Permission is hereby granted, free of charge, to any person obtaining a 6 * copy of this software and associated documentation files (the "Software"), 7 * to deal in the Software without restriction, including without limitation 8 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 9 * and/or sell copies of the Software, and to permit persons to whom the 10 * Software is furnished to do so, subject to the following conditions: 11 * 12 * The above copyright notice and this permission notice shall be included in 13 * all copies or substantial portions of the Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 21 * DEALINGS IN THE SOFTWARE. 22 */ 23 24 // FIXME XXX 25 #define NVOC_GPU_INSTANCE_SUBSCRIPTION_H_PRIVATE_ACCESS_ALLOWED 26 27 #include "core/prelude.h" 28 29 30 // FIXME XXX 31 #define NVOC_KERNEL_GRAPHICS_MANAGER_H_PRIVATE_ACCESS_ALLOWED 32 33 #include <class/cl0002.h> 34 #include <class/cl0005.h> 35 #include <class/cl003e.h> // NV01_MEMORY_SYSTEM 36 #include <class/cl0040.h> // NV01_MEMORY_LOCAL_USER 37 #include <class/cl0080.h> 38 #include <class/cl503b.h> 39 #include <class/cl50a0.h> // NV50_MEMORY_VIRTUAL 40 #include <class/cl90e6.h> 41 #include <class/cl90f1.h> 42 #include <class/cla06c.h> // KEPLER_CHANNEL_GROUP_A 43 #include <class/cla06f.h> 44 #include <class/clb069.h> 45 #include <class/clb069sw.h> 46 #include <class/clb06f.h> 47 #include <class/clb0b5.h> 48 #include <class/clb0b5sw.h> 49 #include <class/clb0c0.h> 50 #include <class/clb1c0.h> 51 #include <class/clc06f.h> 52 #include <class/clc076.h> 53 #include <class/clc0b5.h> 54 #include <class/clc0c0.h> 55 #include <class/clc1b5.h> 56 #include <class/clc1c0.h> 57 #include <class/clc361.h> 58 #include <class/clc365.h> 59 #include <class/clc369.h> 60 #include <class/clc36f.h> 61 #include <class/clc3b5.h> 62 #include <class/clc3c0.h> 63 #include <class/clc46f.h> 64 #include <class/clc4c0.h> 65 #include <class/clc56f.h> 66 #include <class/clc572.h> // PHYSICAL_CHANNEL_GPFIFO 67 #include <class/clc574.h> // UVM_CHANNEL_RETAINER 68 #include <class/clc5b5.h> 69 #include <class/clc5c0.h> 70 #include <class/clc637.h> 71 #include <class/clc6b5.h> 72 #include <class/clc6c0.h> 73 #include <class/clc7b5.h> 74 #include <class/clc7c0.h> 75 #include <class/clcb33.h> // NV_CONFIDENTIAL_COMPUTE 76 #include <class/clc661.h> // HOPPER_USERMODE_A 77 #include <class/clc8b5.h> // HOPPER_DMA_COPY_A 78 #include <class/clcbc0.h> // HOPPER_COMPUTE_A 79 #include <class/clcba2.h> // HOPPER_SEC2_WORK_LAUNCH_A 80 #include <alloc/alloc_access_counter_buffer.h> 81 82 #include <ctrl/ctrl0000/ctrl0000gpu.h> 83 #include <ctrl/ctrl0000/ctrl0000system.h> 84 #include <ctrl/ctrl0080/ctrl0080fifo.h> 85 #include <ctrl/ctrl0080/ctrl0080gpu.h> 86 #include <ctrl/ctrl2080/ctrl2080fb.h> 87 #include <ctrl/ctrl2080/ctrl2080fifo.h> 88 #include <ctrl/ctrl2080/ctrl2080gpu.h> 89 #include <ctrl/ctrl2080/ctrl2080gr.h> 90 #include <ctrl/ctrl90e6.h> 91 #include <ctrl/ctrl90f1.h> 92 #include <ctrl/ctrla06f.h> 93 #include <ctrl/ctrlb069.h> 94 #include <ctrl/ctrlc365.h> 95 #include <ctrl/ctrlc369.h> 96 #include <ctrl/ctrlc36f.h> 97 #include <ctrl/ctrlcb33.h> 98 99 #include <ampere/ga100/dev_runlist.h> 100 #include <containers/queue.h> 101 #include <core/locks.h> 102 #include <gpu/bus/kern_bus.h> 103 #include <gpu/device/device.h> 104 #include <gpu/gpu.h> 105 #include <gpu/mem_mgr/heap.h> 106 #include <gpu/mem_mgr/mem_mgr.h> 107 #include <gpu/mem_mgr/virt_mem_allocator.h> 108 #include <gpu/mem_sys/kern_mem_sys.h> 109 #include <gpu/mmu/kern_gmmu.h> 110 #include <gpu/subdevice/subdevice.h> 111 #include <gpu_mgr/gpu_mgr.h> 112 #include <kernel/gpu/fifo/kernel_channel.h> 113 #include <kernel/gpu/fifo/kernel_channel_group.h> 114 #include <kernel/gpu/fifo/kernel_channel_group_api.h> 115 #include <kernel/gpu/fifo/kernel_ctxshare.h> 116 #include <kernel/gpu/gr/kernel_graphics.h> 117 #include <kernel/gpu/mig_mgr/gpu_instance_subscription.h> 118 #include <kernel/gpu/mig_mgr/kernel_mig_manager.h> 119 #include <kernel/gpu/nvlink/kernel_nvlink.h> 120 #include <mem_mgr/fabric_vaspace.h> 121 #include <mem_mgr/fla_mem.h> 122 #include <mem_mgr/gpu_vaspace.h> 123 #include <mem_mgr/vaspace.h> 124 #include <mmu/gmmu_fmt.h> 125 #include <nv_uvm_types.h> 126 #include <objrpc.h> 127 #include <os/os.h> 128 #include <resserv/rs_client.h> 129 #include <rmapi/client.h> 130 #include <rmapi/nv_gpu_ops.h> 131 #include <rmapi/rs_utils.h> 132 #include <turing/tu102/dev_vm.h> 133 #include <gpu/mem_mgr/vaspace_api.h> 134 #include <vgpu/rpc.h> 135 136 #include <maxwell/gm107/dev_timer.h> 137 #include <pascal/gp100/dev_mmu.h> 138 139 #include <kernel/gpu/conf_compute/ccsl.h> 140 141 #define NV_GPU_OPS_NUM_GPFIFO_ENTRIES_DEFAULT 1024 142 #define NV_GPU_SMALL_PAGESIZE (4 * 1024) 143 144 #define PAGE_SIZE_DEFAULT UVM_PAGE_SIZE_DEFAULT 145 146 typedef struct 147 { 148 NODE btreeNode; 149 NvU64 address; 150 NvHandle handle; 151 NvU64 size; 152 // childHandle tightly couples a physical allocation with a VA memdesc. 153 // A VA memsdesc is considered as a parent memdesc i.e. childHandle will be non-zero (valid). 154 // - If childHandle is non-zero,there is a corresponding PA allocation present. 155 // - If childHandle is zero, this is an invalid state for a VA memdesc. 156 NvHandle childHandle; 157 } gpuMemDesc; 158 159 typedef struct 160 { 161 NvU64 pageSize; // default is 4k or 64k else use pagesize = 2M. 162 NvU64 alignment; 163 } gpuVaAllocInfo; 164 165 typedef struct 166 { 167 NODE btreeNode; 168 NvU64 cpuPointer; 169 NvHandle handle; 170 } cpuMappingDesc; 171 172 typedef struct 173 { 174 NODE btreeNode; 175 PORT_RWLOCK *btreeLock; 176 NvHandle deviceHandle; 177 PNODE subDevices; 178 NvU32 subDeviceCount; 179 NvU32 arch; 180 NvU32 implementation; 181 } deviceDesc; 182 183 typedef struct 184 { 185 NODE btreeNode; 186 NvHandle subDeviceHandle; 187 NvU64 refCount; 188 struct 189 { 190 NvHandle handle; 191 192 // Pointer to the SMC partition information. It is used as a flag to 193 // indicate that the SMC information has been initialized. 194 KERNEL_MIG_GPU_INSTANCE *info; 195 } smcPartition; 196 NvU32 eccOffset; 197 NvU32 eccMask; 198 void *eccReadLocation; 199 NvHandle eccMasterHandle; 200 NvHandle eccCallbackHandle; 201 NvBool bEccInitialized; 202 NvBool bEccEnabled; 203 NvBool eccErrorNotifier; 204 NVOS10_EVENT_KERNEL_CALLBACK_EX eccDbeCallback; 205 206 // The below is used for controlling channel(s) in the GPU. 207 // Example: Volta maps the doorbell work submission register in this 208 // region. 209 NvHandle clientRegionHandle; 210 volatile void *clientRegionMapping; 211 } subDeviceDesc; 212 213 struct gpuSession 214 { 215 NvHandle handle; 216 PNODE devices; 217 PORT_RWLOCK *btreeLock; 218 }; 219 220 221 MAKE_MAP(MemdescMap, PMEMORY_DESCRIPTOR); 222 223 struct gpuDevice 224 { 225 deviceDesc *rmDevice; 226 subDeviceDesc *rmSubDevice; 227 228 // same as rmDevice->deviceHandle 229 NvHandle handle; 230 231 // same as rmSubDevice->subDeviceHandle 232 NvHandle subhandle; 233 234 NvU32 deviceInstance; 235 NvU32 subdeviceInstance; 236 NvU32 gpuId; 237 238 // TODO: Bug 3906861: The info struct contains many of these fields. Find 239 // and remove the redundant fields from this top level. 240 NvU32 hostClass; 241 NvU32 ceClass; 242 NvU32 sec2Class; 243 NvU32 computeClass; 244 NvU32 faultBufferClass; 245 NvU32 accessCounterBufferClass; 246 NvBool isTccMode; 247 NvBool isWddmMode; 248 struct gpuSession *session; 249 NvU8 gpuUUID[NV_GPU_UUID_LEN]; 250 gpuFbInfo fbInfo; 251 gpuInfo info; 252 253 MemdescMap kern2PhysDescrMap; 254 255 PORT_MUTEX *pPagingChannelRpcMutex; 256 }; 257 258 struct gpuAddressSpace 259 { 260 NvHandle handle; 261 struct gpuDevice *device; 262 PNODE allocations; 263 PORT_RWLOCK *allocationsLock; 264 PNODE cpuMappings; 265 PORT_RWLOCK *cpuMappingsLock; 266 PNODE physAllocations; 267 PORT_RWLOCK *physAllocationsLock; 268 NvU64 vaBase; 269 NvU64 vaSize; 270 // Dummy BAR1 allocation required on PCIe systems when GPPut resides in 271 // sysmem. 272 struct 273 { 274 NvU64 refCount; 275 NvU64 gpuAddr; 276 volatile void *cpuAddr; 277 } dummyGpuAlloc; 278 }; 279 280 struct gpuTsg 281 { 282 NvHandle tsgHandle; 283 struct gpuAddressSpace *vaSpace; 284 UVM_GPU_CHANNEL_ENGINE_TYPE engineType; 285 286 // Index of the engine the TSG is bound to. 287 // Ignored if engineType is anything other than 288 // UVM_GPU_CHANNEL_ENGINE_TYPE_CE. 289 NvU32 engineIndex; 290 291 // True when the GPU does not support TSG for the engineType. 292 NvBool isFakeTsg; 293 }; 294 295 struct gpuChannel 296 { 297 const struct gpuTsg *tsg; 298 NvHandle channelHandle; 299 NvHandle engineHandle; 300 NvU32 hwRunlistId; 301 NvU32 hwChannelId; 302 NvU64 gpFifo; 303 NvNotification *errorNotifier; 304 NvU64 errorNotifierOffset; 305 NvU64 *gpFifoEntries; 306 NvU32 fifoEntries; 307 KeplerAControlGPFifo *controlPage; 308 NvHandle hFaultCancelSwMethodClass; 309 volatile unsigned *workSubmissionOffset; 310 NvU32 workSubmissionToken; 311 volatile NvU32 *pWorkSubmissionToken; 312 NvHandle hUserdPhysHandle; 313 NvU64 userdGpuAddr; 314 UVM_BUFFER_LOCATION gpFifoLoc; 315 UVM_BUFFER_LOCATION gpPutLoc; 316 NvBool retainedDummyAlloc; 317 NvBool bClientRegionGpuMappingNeeded; 318 NvU64 clientRegionGpuAddr; 319 }; 320 321 // Add 3 to include local ctx buffer, patch context buffer and PM ctxsw buffer 322 ct_assert(UVM_GPU_CHANNEL_MAX_RESOURCES >= (GR_GLOBALCTX_BUFFER_COUNT + 3)); 323 324 // A retained channel is a user client's channel which has been registered with 325 // the UVM driver. 326 struct gpuRetainedChannel_struct 327 { 328 struct gpuDevice *device; 329 deviceDesc *rmDevice; 330 subDeviceDesc *rmSubDevice; 331 struct gpuSession *session; 332 OBJGPU *pGpu; 333 MEMORY_DESCRIPTOR *instanceMemDesc; 334 MEMORY_DESCRIPTOR *resourceMemDesc[UVM_GPU_CHANNEL_MAX_RESOURCES]; 335 UVM_GPU_CHANNEL_ENGINE_TYPE channelEngineType; 336 NvU32 resourceCount; 337 NvU32 chId; 338 NvU32 runlistId; 339 NvU32 grIdx; 340 341 // Dup of user's TSG (if one exists) under our RM client 342 NvHandle hDupTsg; 343 344 // Dup to context share object 345 NvHandle hDupKernelCtxShare; 346 347 // Handle for object that retains chId and instance mem 348 NvHandle hChannelRetainer; 349 }; 350 351 struct allocFlags 352 { 353 NvBool bGetKernelVA; 354 NvBool bfixedAddressAllocate; 355 }; 356 357 struct ChannelAllocInfo 358 { 359 NV_CHANNEL_ALLOC_PARAMS gpFifoAllocParams; 360 gpuAllocInfo gpuAllocInfo; 361 }; 362 363 struct systemP2PCaps 364 { 365 // peerId[i] contains gpu[i]'s peer id of gpu[1 - i] 366 NvU32 peerIds[2]; 367 368 // true if the two GPUs are direct NvLink or PCIe peers 369 NvU32 accessSupported : 1; 370 371 // true if the two GPUs are indirect (NvLink) peers 372 NvU32 indirectAccessSupported : 1; 373 374 // true if the two GPUs are direct NvLink peers 375 NvU32 nvlinkSupported : 1; 376 377 NvU32 atomicSupported : 1; 378 379 // optimalNvlinkWriteCEs[i] contains the index of the optimal CE to use when 380 // writing from gpu[i] to gpu[1 - i] 381 NvU32 optimalNvlinkWriteCEs[2]; 382 }; 383 384 static NV_STATUS findUvmAddressSpace(NvHandle hClient, NvU32 gpuInstance, NvHandle *pHandle, OBJVASPACE **ppVaspace); 385 static NV_STATUS nvGpuOpsGpuMalloc(struct gpuAddressSpace *vaSpace, 386 NvBool isSystemMemory, 387 NvLength length, 388 NvU64 *gpuOffset, 389 struct allocFlags flags, 390 gpuAllocInfo *allocInfo); 391 static NV_STATUS trackDescriptor(PNODE *pRoot, NvU64 key, void *desc); 392 static NV_STATUS findDescriptor(PNODE pRoot, NvU64 key, void **desc); 393 static NV_STATUS deleteDescriptor(PNODE *pRoot, NvU64 key, void **desc); 394 static NV_STATUS destroyAllGpuMemDescriptors(NvHandle hClient, PNODE pNode); 395 static NV_STATUS getHandleForVirtualAddr(struct gpuAddressSpace *vaSpace, 396 NvU64 allocationVa, 397 NvBool bPhysical, 398 NvHandle *pHandle); 399 static NV_STATUS findDeviceClasses(NvHandle hRoot, 400 NvHandle hDevice, 401 NvHandle hSubdevice, 402 NvU32 *hostClass, 403 NvU32 *ceClass, 404 NvU32 *computeClass, 405 NvU32 *faultBufferClass, 406 NvU32 *accessCounterBufferClass, 407 NvU32 *sec2Class); 408 static NV_STATUS queryCopyEngines(struct gpuDevice *gpu, gpuCesCaps *cesCaps); 409 static void nvGpuOpsFreeVirtual(struct gpuAddressSpace *vaSpace, 410 NvU64 vaOffset); 411 static NvBool isDeviceVoltaPlus(const struct gpuDevice *device); 412 static NvBool isDeviceTuringPlus(const struct gpuDevice *device); 413 static NV_STATUS gpuDeviceMapUsermodeRegion(struct gpuDevice *device); 414 static void gpuDeviceDestroyUsermodeRegion(struct gpuDevice *device); 415 static void gpuDeviceUnmapCpuFreeHandle(struct gpuDevice *device, 416 NvHandle handle, 417 void *ptr, 418 NvU32 flags); 419 static NV_STATUS allocNvlinkStatus(NvHandle hClient, 420 NvHandle hSubDevice, 421 NV2080_CTRL_CMD_NVLINK_GET_NVLINK_STATUS_PARAMS **nvlinkStatusOut); 422 static NvU32 getNvlinkConnectionToNpu(const NV2080_CTRL_CMD_NVLINK_GET_NVLINK_STATUS_PARAMS *nvlinkStatus, 423 NvBool *atomicSupported, 424 NvU32 *linkBandwidthMBps); 425 static NvU32 getNvlinkConnectionToSwitch(const NV2080_CTRL_CMD_NVLINK_GET_NVLINK_STATUS_PARAMS *nvlinkStatus, 426 NvU32 *linkBandwidthMBps); 427 static NV_STATUS nvGpuOpsGetMemoryByHandle(NvHandle hClient, NvHandle hMemory, Memory **ppMemory); 428 static void _nvGpuOpsReleaseChannel(gpuRetainedChannel *retainedChannel); 429 static NV_STATUS _nvGpuOpsRetainChannelResources(struct gpuDevice *device, 430 NvHandle hClient, 431 NvHandle hKernelChannel, 432 gpuRetainedChannel *retainedChannel, 433 gpuChannelInstanceInfo *channelInstanceInfo); 434 static void _nvGpuOpsReleaseChannelResources(gpuRetainedChannel *retainedChannel); 435 436 static NV_STATUS 437 nvGpuOpsQueryGpuConfidentialComputeCaps(NvHandle hClient, 438 UvmGpuConfComputeCaps *pGpuConfComputeCaps); 439 440 /* 441 * This function will lock the RM API lock according to rmApiLockFlags, and then 442 * examine numLocksNeeded. If this is 0, no GPU locks will be acquired. If it 443 * is 1, the GPU lock for deviceInstance1 will be locked. If it is 2, both GPU 444 * locks for deviceInstance1 and deviceInstance2 will be locked. If it is any 445 * other number, all the GPU locks will be acquired. 446 * 447 * This function will attempt to grab the needed GPU locks, and will write the 448 * resulting mask into *lockedGpusMask. In the event of a failure to acquire any 449 * needed GPU locks, the written mask is 0 and the function returns 450 * NV_ERR_INVALID_LOCK_STATE. In this case, all locks held are released and the 451 * caller does not need to release any locks. 452 */ 453 454 typedef struct nvGpuOpsLockSet 455 { 456 NvBool isRmLockAcquired; 457 NvBool isRmSemaAcquired; 458 GPU_MASK gpuMask; 459 RsClient *pClientLocked; 460 } nvGpuOpsLockSet; 461 462 static void _nvGpuOpsLocksRelease(nvGpuOpsLockSet *acquiredLocks) 463 { 464 OBJSYS *pSys; 465 pSys = SYS_GET_INSTANCE(); 466 467 if (acquiredLocks->gpuMask != 0) 468 { 469 rmGpuGroupLockRelease(acquiredLocks->gpuMask, GPUS_LOCK_FLAGS_NONE); 470 acquiredLocks->gpuMask = 0; 471 } 472 473 if (acquiredLocks->pClientLocked != NULL) 474 { 475 serverReleaseClient(&g_resServ, LOCK_ACCESS_WRITE, acquiredLocks->pClientLocked); 476 acquiredLocks->pClientLocked = NULL; 477 } 478 479 if (acquiredLocks->isRmLockAcquired == NV_TRUE) 480 { 481 rmapiLockRelease(); 482 acquiredLocks->isRmLockAcquired = NV_FALSE; 483 } 484 485 if (acquiredLocks->isRmSemaAcquired == NV_TRUE) 486 { 487 osReleaseRmSema(pSys->pSema, NULL); 488 acquiredLocks->isRmSemaAcquired = NV_FALSE; 489 } 490 } 491 492 static NV_STATUS _nvGpuOpsLocksAcquire(NvU32 rmApiLockFlags, 493 NvHandle hClient, 494 RsClient **ppClient, 495 NvU32 numLocksNeeded, 496 NvU32 deviceInstance1, 497 NvU32 deviceInstance2, 498 nvGpuOpsLockSet *acquiredLocks) 499 { 500 NV_STATUS status; 501 OBJSYS *pSys; 502 GPU_MASK gpuMaskRequested; 503 GPU_MASK gpuMaskAcquired; 504 505 acquiredLocks->isRmSemaAcquired = NV_FALSE; 506 acquiredLocks->isRmLockAcquired = NV_FALSE; 507 acquiredLocks->gpuMask = 0; 508 acquiredLocks->pClientLocked = NULL; 509 510 pSys = SYS_GET_INSTANCE(); 511 if (pSys == NULL) 512 { 513 return NV_ERR_GENERIC; 514 } 515 516 status = osAcquireRmSema(pSys->pSema); 517 if (status != NV_OK) 518 { 519 return status; 520 } 521 acquiredLocks->isRmSemaAcquired = NV_TRUE; 522 523 status = rmapiLockAcquire(rmApiLockFlags, RM_LOCK_MODULES_GPU_OPS); 524 if (status != NV_OK) 525 { 526 _nvGpuOpsLocksRelease(acquiredLocks); 527 return status; 528 } 529 acquiredLocks->isRmLockAcquired = NV_TRUE; 530 531 if (hClient != NV01_NULL_OBJECT) 532 { 533 status = serverAcquireClient(&g_resServ, hClient, LOCK_ACCESS_WRITE, &acquiredLocks->pClientLocked); 534 535 if (status != NV_OK) 536 { 537 _nvGpuOpsLocksRelease(acquiredLocks); 538 return status; 539 } 540 541 if (ppClient != NULL) 542 *ppClient = acquiredLocks->pClientLocked; 543 } 544 545 // 546 // Determine the GPU lock mask we need. If we are asked for 0, 1, or 2 locks 547 // then we should use neither, just the first, or both deviceInstance 548 // parameters, respectively. If any other number of locks is requested, we 549 // acquire all of the lockable GPUS. 550 // 551 // We cannot simply determine the mask outside of this function and pass in 552 // the mask, because gpumgrGetDeviceGpuMask requires that we hold the RM API 553 // lock. Otherwise, SLI rewiring could preempt lock acquisition and render 554 // the mask invalid. 555 // 556 gpuMaskRequested = 0; 557 558 if (numLocksNeeded > 2) 559 { 560 gpuMaskRequested = GPUS_LOCK_ALL; 561 } 562 else 563 { 564 if (numLocksNeeded > 0) 565 { 566 gpuMaskRequested |= gpumgrGetDeviceGpuMask(deviceInstance1); 567 } 568 569 if (numLocksNeeded > 1) 570 { 571 gpuMaskRequested |= gpumgrGetDeviceGpuMask(deviceInstance2); 572 } 573 } 574 575 // 576 // The gpuMask parameter to rmGpuGroupLockAcquire is both input and output, 577 // so we have to copy in what we want here to make comparisons later. 578 // 579 gpuMaskAcquired = gpuMaskRequested; 580 if (gpuMaskRequested != 0) 581 { 582 status = rmGpuGroupLockAcquire(0, GPU_LOCK_GRP_MASK, 583 GPUS_LOCK_FLAGS_NONE, 584 RM_LOCK_MODULES_GPU_OPS, &gpuMaskAcquired); 585 } 586 acquiredLocks->gpuMask = gpuMaskAcquired; 587 588 // 589 // If we cannot acquire all the locks requested, we release all the locks 590 // we *were* able to get and bail out here. There is never a safe way to 591 // proceed with a GPU ops function with fewer locks than requested. If there 592 // was a safe way to proceed, the client should have asked for fewer locks 593 // in the first place. 594 // 595 // That said, callers sometimes want "all available GPUs", and then the call 596 // to rmGpuGroupLockAcquire will mask off invalid GPUs for us. Hence the 597 // exception for GPUS_LOCK_ALL. 598 // 599 if (gpuMaskAcquired != gpuMaskRequested && gpuMaskRequested != GPUS_LOCK_ALL) 600 { 601 status = NV_ERR_INVALID_LOCK_STATE; 602 } 603 604 if (status != NV_OK) 605 { 606 _nvGpuOpsLocksRelease(acquiredLocks); 607 } 608 609 return status; 610 } 611 612 static NV_STATUS _nvGpuOpsLocksAcquireAll(NvU32 rmApiLockFlags, 613 NvHandle hClient, RsClient **ppClient, 614 nvGpuOpsLockSet *acquiredLocks) 615 { 616 return _nvGpuOpsLocksAcquire(rmApiLockFlags, hClient, ppClient, 3, 0, 0, acquiredLocks); 617 } 618 619 static NV_STATUS nvGpuOpsCreateClient(RM_API *pRmApi, NvHandle *hClient) 620 { 621 NV_STATUS status; 622 RS_SHARE_POLICY sharePolicy; 623 624 *hClient = NV01_NULL_OBJECT; 625 status = pRmApi->Alloc(pRmApi, NV01_NULL_OBJECT, NV01_NULL_OBJECT, 626 hClient, NV01_ROOT, hClient, sizeof(*hClient)); 627 if (status != NV_OK) 628 { 629 return status; 630 } 631 632 // Override default system share policy. Prohibit sharing of any and all 633 // objects owned by this client. 634 portMemSet(&sharePolicy, 0, sizeof(sharePolicy)); 635 sharePolicy.type = RS_SHARE_TYPE_ALL; 636 sharePolicy.action = RS_SHARE_ACTION_FLAG_REVOKE; 637 RS_ACCESS_MASK_ADD(&sharePolicy.accessMask, RS_ACCESS_DUP_OBJECT); 638 639 status = pRmApi->Share(pRmApi, *hClient, *hClient, &sharePolicy); 640 if (status != NV_OK) 641 { 642 pRmApi->Free(pRmApi, *hClient, *hClient); 643 } 644 645 return status; 646 } 647 648 NV_STATUS nvGpuOpsCreateSession(struct gpuSession **session) 649 { 650 struct gpuSession *gpuSession = NULL; 651 NV_STATUS status; 652 RM_API *pRmApi = rmapiGetInterface(RMAPI_EXTERNAL_KERNEL); 653 PORT_MEM_ALLOCATOR *pAlloc = portMemAllocatorGetGlobalNonPaged(); 654 655 gpuSession = portMemAllocNonPaged(sizeof(*gpuSession)); 656 if (gpuSession == NULL) 657 return NV_ERR_NO_MEMORY; 658 659 portMemSet(gpuSession, 0, sizeof(*gpuSession)); 660 661 status = nvGpuOpsCreateClient(pRmApi, &gpuSession->handle); 662 if (status != NV_OK) 663 { 664 portMemFree(gpuSession); 665 return status; 666 } 667 668 gpuSession->devices = NULL; 669 gpuSession->btreeLock = portSyncRwLockCreate(pAlloc); 670 *session = (gpuSession); 671 return status; 672 } 673 674 NV_STATUS nvGpuOpsDestroySession(struct gpuSession *session) 675 { 676 RM_API *pRmApi = rmapiGetInterface(RMAPI_EXTERNAL_KERNEL); 677 678 if (!session) 679 return NV_OK; 680 681 // Sanity Check: There should not be any attached devices with the session! 682 NV_ASSERT(!session->devices); 683 684 // freeing session will free everything under it 685 pRmApi->Free(pRmApi, session->handle, session->handle); 686 portSyncRwLockDestroy(session->btreeLock); 687 portMemFree(session); 688 return NV_OK; 689 } 690 691 static void *gpuBar0BaseAddress(OBJGPU *pGpu) 692 { 693 DEVICE_MAPPING *pMapping = gpuGetDeviceMapping(pGpu, DEVICE_INDEX_GPU, 0); 694 695 NV_ASSERT(pMapping); 696 697 return pMapping->gpuNvAddr; 698 } 699 700 static void eccErrorCallback(void *pArg, void *pData, NvHandle hEvent, 701 NvU32 data, NvU32 status) 702 { 703 subDeviceDesc *rmSubDevice = (subDeviceDesc *)pArg; 704 705 NV_ASSERT(rmSubDevice); 706 707 rmSubDevice->eccErrorNotifier = NV_TRUE; 708 } 709 710 static NvBool deviceNeedsDummyAlloc(struct gpuDevice *device) 711 { 712 // The dummy mapping is needed so the client can issue a read to flush out 713 // any CPU BAR1 PCIE writes prior to updating GPPUT. This is only needed 714 // when the bus is non-coherent and when not in ZeroFB (where there can't be 715 // any BAR1 mappings). 716 return device->info.sysmemLink < UVM_LINK_TYPE_NVLINK_2 && !device->fbInfo.bZeroFb; 717 } 718 719 static NV_STATUS nvGpuOpsVaSpaceRetainDummyAlloc(struct gpuAddressSpace *vaSpace) 720 { 721 struct gpuDevice *device; 722 NV_STATUS status = NV_OK; 723 gpuAllocInfo allocInfo = {0}; 724 struct allocFlags flags = {0}; 725 726 device = vaSpace->device; 727 NV_ASSERT(device); 728 NV_ASSERT(deviceNeedsDummyAlloc(device)); 729 730 if (vaSpace->dummyGpuAlloc.refCount > 0) 731 goto done; 732 733 // When HCC is enabled the allocation happens in CPR vidmem 734 // The dummy BAR1 pointer read mechanism won't work when 735 // BAR1 access to CPR vidmem is sealed off as part of HCC 736 // production settings. Creating dummy BAR1 mapping can 737 // also be avoided when doorbell is in BAR1. 738 739 flags.bGetKernelVA = NV_FALSE; 740 status = nvGpuOpsGpuMalloc(vaSpace, 741 NV_FALSE, 742 NV_GPU_SMALL_PAGESIZE, 743 &vaSpace->dummyGpuAlloc.gpuAddr, 744 flags, 745 &allocInfo); 746 if (status != NV_OK) 747 return status; 748 749 status = nvGpuOpsMemoryCpuMap(vaSpace, 750 vaSpace->dummyGpuAlloc.gpuAddr, 751 NV_GPU_SMALL_PAGESIZE, 752 (void **)&vaSpace->dummyGpuAlloc.cpuAddr, 753 PAGE_SIZE_DEFAULT); 754 if (status != NV_OK) 755 nvGpuOpsMemoryFree(vaSpace, vaSpace->dummyGpuAlloc.gpuAddr); 756 757 done: 758 if (status == NV_OK) 759 { 760 ++vaSpace->dummyGpuAlloc.refCount; 761 NV_ASSERT(vaSpace->dummyGpuAlloc.gpuAddr); 762 NV_ASSERT(vaSpace->dummyGpuAlloc.cpuAddr); 763 } 764 765 return status; 766 } 767 768 static void nvGpuOpsVaSpaceReleaseDummyAlloc(struct gpuAddressSpace *vaSpace) 769 { 770 NV_ASSERT(deviceNeedsDummyAlloc(vaSpace->device)); 771 NV_ASSERT(vaSpace->dummyGpuAlloc.refCount != 0); 772 773 if (--vaSpace->dummyGpuAlloc.refCount > 0) 774 return; 775 776 if (vaSpace->dummyGpuAlloc.cpuAddr) 777 nvGpuOpsMemoryCpuUnMap(vaSpace, (void *)vaSpace->dummyGpuAlloc.cpuAddr); 778 779 if (vaSpace->dummyGpuAlloc.gpuAddr) 780 nvGpuOpsMemoryFree(vaSpace, vaSpace->dummyGpuAlloc.gpuAddr); 781 782 vaSpace->dummyGpuAlloc.cpuAddr = NULL; 783 vaSpace->dummyGpuAlloc.gpuAddr = 0; 784 } 785 786 static NV_STATUS nvGpuOpsDisableVaSpaceChannels(struct gpuAddressSpace *vaSpace) 787 { 788 NV_STATUS status = NV_OK; 789 OBJVASPACE *pVAS = NULL; 790 Device *pDevice; 791 RsClient *pClient; 792 RS_ORDERED_ITERATOR it; 793 RM_API *pRmApi = rmapiGetInterface(RMAPI_GPU_LOCK_INTERNAL); 794 NV2080_CTRL_FIFO_DISABLE_CHANNELS_PARAMS disableParams = {0}; 795 796 if (vaSpace == NULL) 797 return NV_ERR_INVALID_ARGUMENT; 798 799 status = serverGetClientUnderLock(&g_resServ, vaSpace->device->session->handle, &pClient); 800 if (status != NV_OK) 801 return status; 802 803 status = deviceGetByHandle(pClient, vaSpace->device->handle, &pDevice); 804 if (status != NV_OK) 805 return status; 806 807 GPU_RES_SET_THREAD_BC_STATE(pDevice); 808 809 status = vaspaceGetByHandleOrDeviceDefault(pClient, 810 vaSpace->device->handle, 811 vaSpace->handle, 812 &pVAS); 813 if ((status != NV_OK) || (pVAS == NULL)) 814 return NV_ERR_INVALID_ARGUMENT; 815 816 // Stop all channels under the VAS, but leave them bound. 817 it = kchannelGetIter(pClient, RES_GET_REF(pDevice)); 818 while (clientRefOrderedIterNext(pClient, &it)) 819 { 820 KernelChannel *pKernelChannel = dynamicCast(it.pResourceRef->pResource, KernelChannel); 821 822 NV_ASSERT_OR_ELSE(pKernelChannel != NULL, continue); 823 if (pKernelChannel->pVAS != pVAS) 824 continue; 825 826 NV_ASSERT_OR_RETURN(disableParams.numChannels < NV2080_CTRL_FIFO_DISABLE_CHANNELS_MAX_ENTRIES, NV_ERR_NOT_SUPPORTED); 827 disableParams.hClientList[disableParams.numChannels] = RES_GET_CLIENT_HANDLE(pKernelChannel); 828 disableParams.hChannelList[disableParams.numChannels] = RES_GET_HANDLE(pKernelChannel); 829 disableParams.numChannels++; 830 } 831 832 if (disableParams.numChannels == 0) 833 return status; 834 835 disableParams.bDisable = NV2080_CTRL_FIFO_DISABLE_CHANNEL_TRUE; 836 status = pRmApi->Control(pRmApi, 837 vaSpace->device->session->handle, 838 vaSpace->device->subhandle, 839 NV2080_CTRL_CMD_FIFO_DISABLE_CHANNELS, 840 &disableParams, 841 sizeof(disableParams)); 842 return status; 843 } 844 845 static NV_STATUS nvGpuOpsEnableVaSpaceChannels(struct gpuAddressSpace *vaSpace) 846 { 847 NV_STATUS status = NV_OK; 848 OBJVASPACE *pVAS = NULL; 849 Device *pDevice; 850 RsClient *pClient; 851 RS_ORDERED_ITERATOR it; 852 NV2080_CTRL_FIFO_DISABLE_CHANNELS_PARAMS disableParams = {0}; 853 RM_API *pRmApi = rmapiGetInterface(RMAPI_GPU_LOCK_INTERNAL); 854 855 if (vaSpace == NULL) 856 return NV_ERR_INVALID_ARGUMENT; 857 858 status = serverGetClientUnderLock(&g_resServ, vaSpace->device->session->handle, &pClient); 859 if (status != NV_OK) 860 return status; 861 862 status = deviceGetByHandle(pClient, vaSpace->device->handle, &pDevice); 863 if (status != NV_OK) 864 return status; 865 866 GPU_RES_SET_THREAD_BC_STATE(pDevice); 867 868 status = vaspaceGetByHandleOrDeviceDefault(pClient, 869 vaSpace->device->handle, 870 vaSpace->handle, 871 &pVAS); 872 if ((status != NV_OK) || (pVAS == NULL)) 873 return NV_ERR_INVALID_ARGUMENT; 874 875 it = kchannelGetIter(pClient, RES_GET_REF(pDevice)); 876 while (clientRefOrderedIterNext(pClient, &it)) 877 { 878 KernelChannel *pKernelChannel = dynamicCast(it.pResourceRef->pResource, KernelChannel); 879 880 NV_ASSERT_OR_ELSE(pKernelChannel != NULL, continue); 881 if (pKernelChannel->pVAS != pVAS) 882 continue; 883 884 NV_ASSERT_OR_RETURN(disableParams.numChannels < NV2080_CTRL_FIFO_DISABLE_CHANNELS_MAX_ENTRIES, NV_ERR_NOT_SUPPORTED); 885 disableParams.hClientList[disableParams.numChannels] = RES_GET_CLIENT_HANDLE(pKernelChannel); 886 disableParams.hChannelList[disableParams.numChannels] = RES_GET_HANDLE(pKernelChannel); 887 disableParams.numChannels++; 888 } 889 890 if (disableParams.numChannels == 0) 891 return status; 892 893 disableParams.bDisable = NV2080_CTRL_FIFO_DISABLE_CHANNEL_FALSE; 894 status = pRmApi->Control(pRmApi, 895 vaSpace->device->session->handle, 896 vaSpace->device->subhandle, 897 NV2080_CTRL_CMD_FIFO_DISABLE_CHANNELS, 898 &disableParams, 899 sizeof(disableParams)); 900 return status; 901 } 902 903 static NV_STATUS nvGpuOpsRmDeviceCreate(struct gpuDevice *device) 904 { 905 NV_STATUS status; 906 NV0080_ALLOC_PARAMETERS nv0080AllocParams = { 0 }; 907 deviceDesc *rmDevice = NULL; 908 struct gpuSession *session = device->session; 909 RM_API *pRmApi = rmapiGetInterface(RMAPI_EXTERNAL_KERNEL); 910 PORT_MEM_ALLOCATOR *pAlloc = portMemAllocatorGetGlobalNonPaged(); 911 OBJGPU *pGpu; 912 913 // Find the existing rmDevice. 914 // Otherwise, allocate an rmDevice. 915 portSyncRwLockAcquireRead(session->btreeLock); 916 status = findDescriptor(session->devices, device->deviceInstance, (void**)&rmDevice); 917 portSyncRwLockReleaseRead(session->btreeLock); 918 if (status == NV_OK) 919 { 920 NV_ASSERT(rmDevice); 921 device->rmDevice = rmDevice; 922 device->handle = rmDevice->deviceHandle; 923 return NV_OK; 924 } 925 926 rmDevice = portMemAllocNonPaged(sizeof(*rmDevice)); 927 if (rmDevice == NULL) 928 return NV_ERR_INSUFFICIENT_RESOURCES; 929 930 portMemSet(rmDevice, 0, sizeof(*rmDevice)); 931 932 nv0080AllocParams.deviceId = device->deviceInstance; 933 nv0080AllocParams.hClientShare = session->handle; 934 device->handle = NV01_NULL_OBJECT; 935 status = pRmApi->Alloc(pRmApi, 936 session->handle, 937 session->handle, 938 &device->handle, 939 NV01_DEVICE_0, 940 &nv0080AllocParams, 941 sizeof(nv0080AllocParams)); 942 if (status != NV_OK) 943 goto cleanup_device_desc; 944 945 device->rmDevice = rmDevice; 946 rmDevice->deviceHandle = device->handle; 947 rmDevice->subDevices = NULL; 948 rmDevice->subDeviceCount = 0; 949 950 portSyncRwLockAcquireWrite(session->btreeLock); 951 status = trackDescriptor(&session->devices, device->deviceInstance, rmDevice); 952 portSyncRwLockReleaseWrite(session->btreeLock); 953 if (status != NV_OK) 954 goto cleanup_device; 955 956 // TODO: Acquired because CliGetGpuContext expects RMAPI lock. Necessary? 957 status = rmapiLockAcquire(RMAPI_LOCK_FLAGS_READ, RM_LOCK_MODULES_GPU_OPS); 958 if (status != NV_OK) 959 goto cleanup_device; 960 status = CliSetGpuContext(session->handle, device->handle, &pGpu, NULL); 961 rmapiLockRelease(); 962 if (status != NV_OK) 963 goto cleanup_device; 964 965 rmDevice->arch = gpuGetChipArch(pGpu); 966 rmDevice->implementation = gpuGetChipImpl(pGpu); 967 rmDevice->btreeLock = portSyncRwLockCreate(pAlloc); 968 969 return NV_OK; 970 971 cleanup_device: 972 pRmApi->Free(pRmApi, session->handle, device->handle); 973 cleanup_device_desc: 974 portMemFree(rmDevice); 975 return status; 976 } 977 978 static void nvGpuOpsRmDeviceDestroy(struct gpuDevice *device) 979 { 980 RM_API *pRmApi = rmapiGetInterface(RMAPI_EXTERNAL_KERNEL); 981 deviceDesc *rmDevice = device->rmDevice; 982 983 NV_ASSERT(rmDevice != NULL); 984 985 if (rmDevice->subDeviceCount == 0) 986 { 987 struct gpuSession *session = device->session; 988 portSyncRwLockAcquireWrite(session->btreeLock); 989 deleteDescriptor(&session->devices, device->deviceInstance, (void**)&rmDevice); 990 pRmApi->Free(pRmApi, session->handle, rmDevice->deviceHandle); 991 portSyncRwLockDestroy(rmDevice->btreeLock); 992 portMemFree(rmDevice); 993 portSyncRwLockReleaseWrite(session->btreeLock); 994 } 995 } 996 997 static void gpuDeviceRmSubDeviceDeinitEcc(struct gpuDevice *device) 998 { 999 NV2080_CTRL_EVENT_SET_NOTIFICATION_PARAMS eventDbeParams = {0}; 1000 subDeviceDesc *rmSubDevice = device->rmSubDevice; 1001 RM_API *pRmApi = rmapiGetInterface(RMAPI_EXTERNAL_KERNEL); 1002 1003 if (!rmSubDevice->bEccInitialized || !rmSubDevice->bEccEnabled) 1004 return; 1005 1006 // Disable all notifications specific to ECC on this device 1007 eventDbeParams.event = NV2080_NOTIFIERS_ECC_DBE; 1008 eventDbeParams.action = NV2080_CTRL_EVENT_SET_NOTIFICATION_ACTION_DISABLE; 1009 1010 pRmApi->Control(pRmApi, 1011 device->session->handle, 1012 device->subhandle, 1013 NV2080_CTRL_CMD_EVENT_SET_NOTIFICATION, 1014 (void *)&eventDbeParams, 1015 sizeof(eventDbeParams)); 1016 1017 if (!isDeviceTuringPlus(device)) 1018 { 1019 gpuDeviceUnmapCpuFreeHandle(device, 1020 rmSubDevice->eccMasterHandle, 1021 rmSubDevice->eccReadLocation, 1022 DRF_DEF(OS33, _FLAGS, _ACCESS, _READ_ONLY)); 1023 } 1024 1025 rmSubDevice->eccReadLocation = NULL; 1026 1027 if (rmSubDevice->eccCallbackHandle) 1028 pRmApi->Free(pRmApi, device->session->handle, rmSubDevice->eccCallbackHandle); 1029 1030 rmSubDevice->bEccEnabled = NV_FALSE; 1031 rmSubDevice->bEccInitialized = NV_FALSE; 1032 } 1033 1034 // 1035 // Initialize the ECC state for an RM subdevice 1036 // 1037 // This can only be done once per RM subdevice as GF100_SUBDEVICE_MASTER can 1038 // only be allocated once. 1039 // 1040 static NV_STATUS gpuDeviceRmSubDeviceInitEcc(struct gpuDevice *device) 1041 { 1042 NV_STATUS status = NV_OK; 1043 NvU32 i = 0; 1044 int tempPtr = 0; 1045 1046 struct 1047 { 1048 NV2080_CTRL_GPU_QUERY_ECC_STATUS_PARAMS eccStatus; 1049 NV90E6_CTRL_MASTER_GET_ECC_INTR_OFFSET_MASK_PARAMS eccMask; 1050 NV90E6_CTRL_MASTER_GET_VIRTUAL_FUNCTION_ERROR_CONT_INTR_MASK_PARAMS errContIntrMask; 1051 NV2080_CTRL_EVENT_SET_NOTIFICATION_PARAMS eventDbe; 1052 NV0005_ALLOC_PARAMETERS allocDbe; 1053 } *pParams = NULL; 1054 OBJGPU *pGpu = NULL; 1055 NvBool supportedOnAnyUnits = NV_FALSE; 1056 subDeviceDesc *rmSubDevice = device->rmSubDevice; 1057 RM_API *pRmApi = rmapiGetInterface(RMAPI_EXTERNAL_KERNEL); 1058 1059 NV_ASSERT(device); 1060 1061 // TODO: Acquired because CliGetGpuContext expects RMAPI lock. Necessary? 1062 status = rmapiLockAcquire(RMAPI_LOCK_FLAGS_READ, RM_LOCK_MODULES_GPU_OPS); 1063 if (status != NV_OK) 1064 return status; 1065 status = CliSetGpuContext(device->session->handle, device->handle, &pGpu, NULL); 1066 rmapiLockRelease(); 1067 if (status != NV_OK) 1068 return status; 1069 1070 rmSubDevice->eccOffset = 0; 1071 rmSubDevice->eccMask = 0; 1072 rmSubDevice->eccReadLocation = NULL; 1073 rmSubDevice->eccMasterHandle = 0; 1074 rmSubDevice->bEccInitialized = NV_FALSE; 1075 rmSubDevice->bEccEnabled = NV_FALSE; 1076 1077 // Do not initialize ECC for this device if SMC is enabled, but no partition 1078 // was subscribed to. This will be the case for select devices created 1079 // on behalf of the UVM driver. 1080 if (IS_MIG_IN_USE(pGpu) && rmSubDevice->smcPartition.info == NULL) 1081 return NV_OK; 1082 1083 pParams = portMemAllocNonPaged(sizeof(*pParams)); 1084 if (pParams == NULL) 1085 { 1086 return NV_ERR_NO_MEMORY; 1087 } 1088 1089 portMemSet(pParams, 0, sizeof(*pParams)); 1090 1091 // Check ECC before doing anything here 1092 status = pRmApi->Control(pRmApi, 1093 device->session->handle, 1094 device->subhandle, 1095 NV2080_CTRL_CMD_GPU_QUERY_ECC_STATUS, 1096 &pParams->eccStatus, 1097 sizeof(pParams->eccStatus)); 1098 1099 if (status == NV_ERR_NOT_SUPPORTED) 1100 { 1101 // Nothing to do if ECC not supported 1102 rmSubDevice->bEccEnabled = NV_FALSE; 1103 status = NV_OK; 1104 goto done; 1105 } 1106 else if (status != NV_OK) 1107 { 1108 goto done; 1109 } 1110 1111 // 1112 // ECC is considered as supported only if it's enabled for all supported units, 1113 // and there's at least 1 supported unit 1114 // 1115 rmSubDevice->bEccEnabled = NV_TRUE; 1116 1117 for (i = 0; i < NV2080_CTRL_GPU_ECC_UNIT_COUNT; i++) 1118 { 1119 // Check the ECC status only on the units supported by HW 1120 if (pParams->eccStatus.units[i].supported) 1121 { 1122 supportedOnAnyUnits = NV_TRUE; 1123 if (!pParams->eccStatus.units[i].enabled) 1124 rmSubDevice->bEccEnabled = NV_FALSE; 1125 } 1126 } 1127 1128 if (!supportedOnAnyUnits) 1129 rmSubDevice->bEccEnabled = NV_FALSE; 1130 1131 if (!rmSubDevice->bEccEnabled) 1132 { 1133 // ECC not enabled, early-out 1134 status = NV_OK; 1135 goto done; 1136 } 1137 1138 //Allocate memory for interrupt tree 1139 rmSubDevice->eccMasterHandle = NV01_NULL_OBJECT; 1140 status = pRmApi->Alloc(pRmApi, device->session->handle, 1141 device->subhandle, 1142 &rmSubDevice->eccMasterHandle, 1143 GF100_SUBDEVICE_MASTER, 1144 &tempPtr, 1145 sizeof(tempPtr)); 1146 if (status != NV_OK) 1147 goto done; 1148 1149 if (isDeviceTuringPlus(device)) 1150 { 1151 rmSubDevice->eccReadLocation = gpuBar0BaseAddress(pGpu); 1152 status = pRmApi->Control(pRmApi, 1153 device->session->handle, 1154 rmSubDevice->eccMasterHandle, 1155 NV90E6_CTRL_CMD_MASTER_GET_VIRTUAL_FUNCTION_ERROR_CONT_INTR_MASK, 1156 &pParams->errContIntrMask, 1157 sizeof(pParams->errContIntrMask)); 1158 if (status != NV_OK) 1159 goto done; 1160 1161 rmSubDevice->eccOffset = GPU_GET_VREG_OFFSET(pGpu, NV_VIRTUAL_FUNCTION_ERR_CONT); 1162 rmSubDevice->eccMask = pParams->errContIntrMask.eccMask; 1163 } 1164 else 1165 { 1166 // Map memory 1167 status = pRmApi->MapToCpu(pRmApi, 1168 device->session->handle, 1169 device->subhandle, 1170 rmSubDevice->eccMasterHandle, 0, 1171 sizeof(GF100MASTERMap), 1172 (void **)(&rmSubDevice->eccReadLocation), 1173 DRF_DEF(OS33, _FLAGS, _ACCESS, _READ_ONLY)); 1174 if (status != NV_OK) 1175 goto done; 1176 1177 NV_ASSERT(rmSubDevice->eccReadLocation); 1178 1179 status = pRmApi->Control(pRmApi, 1180 device->session->handle, 1181 rmSubDevice->eccMasterHandle, 1182 NV90E6_CTRL_CMD_MASTER_GET_ECC_INTR_OFFSET_MASK, 1183 &pParams->eccMask, 1184 sizeof(pParams->eccMask)); 1185 if (status != NV_OK) 1186 goto done; 1187 1188 // Fill the mask and offset which has been read from control call 1189 rmSubDevice->eccOffset = pParams->eccMask.offset; 1190 rmSubDevice->eccMask = pParams->eccMask.mask; 1191 } 1192 1193 // Setup callback for ECC DBE 1194 rmSubDevice->eccDbeCallback.func = eccErrorCallback; 1195 rmSubDevice->eccDbeCallback.arg = rmSubDevice; 1196 1197 pParams->allocDbe.hParentClient = device->session->handle; 1198 pParams->allocDbe.hClass = NV01_EVENT_KERNEL_CALLBACK_EX; 1199 pParams->allocDbe.notifyIndex = NV2080_NOTIFIERS_ECC_DBE; 1200 pParams->allocDbe.data = NV_PTR_TO_NvP64(&rmSubDevice->eccDbeCallback); 1201 1202 rmSubDevice->eccCallbackHandle = NV01_NULL_OBJECT; 1203 status = pRmApi->Alloc(pRmApi, device->session->handle, 1204 device->subhandle, 1205 &rmSubDevice->eccCallbackHandle, 1206 NV01_EVENT_KERNEL_CALLBACK_EX, 1207 &pParams->allocDbe, 1208 sizeof(pParams->allocDbe)); 1209 1210 if (status != NV_OK) 1211 goto done; 1212 1213 pParams->eventDbe.event = NV2080_NOTIFIERS_ECC_DBE; 1214 pParams->eventDbe.action = NV2080_CTRL_EVENT_SET_NOTIFICATION_ACTION_SINGLE; 1215 1216 status = pRmApi->Control(pRmApi, 1217 device->session->handle, 1218 device->subhandle, 1219 NV2080_CTRL_CMD_EVENT_SET_NOTIFICATION, 1220 &pParams->eventDbe, 1221 sizeof(pParams->eventDbe)); 1222 if (status != NV_OK) 1223 goto done; 1224 1225 done: 1226 portMemFree(pParams); 1227 1228 if (status == NV_OK) 1229 rmSubDevice->bEccInitialized = NV_TRUE; 1230 else 1231 gpuDeviceRmSubDeviceDeinitEcc(device); 1232 1233 return status; 1234 } 1235 1236 static NV_STATUS getSwizzIdFromSmcPartHandle(RM_API *pRmApi, 1237 NvHandle hClient, 1238 NvHandle hGPUInstanceSubscription, 1239 NvU32 *swizzId) 1240 { 1241 NV_STATUS status; 1242 RsResourceRef *pSmcResourceRef; 1243 GPUInstanceSubscription *pGPUInstanceSubscription; 1244 1245 // get GPUInstanceSubscription handle 1246 // TODO: Acquired because serverutilGetResourceRef expects RMAPI lock. Necessary? 1247 status = rmapiLockAcquire(RMAPI_LOCK_FLAGS_READ, RM_LOCK_MODULES_GPU_OPS); 1248 if (status != NV_OK) 1249 return status; 1250 status = serverutilGetResourceRef(hClient, hGPUInstanceSubscription, &pSmcResourceRef); 1251 rmapiLockRelease(); 1252 if (status != NV_OK) 1253 return status; 1254 1255 pGPUInstanceSubscription = dynamicCast(pSmcResourceRef->pResource, GPUInstanceSubscription); 1256 if (!pGPUInstanceSubscription) 1257 return NV_ERR_INVALID_OBJECT; 1258 1259 *swizzId = pGPUInstanceSubscription->pKernelMIGGpuInstance->swizzId; 1260 1261 return NV_OK; 1262 } 1263 1264 // 1265 // Determine an SMC partition's swizzId given a user subscription 1266 // 1267 // This requires temporarily duplicating the handle to validate it, as well 1268 // as to prevent removal of the partition for the duration of the look-up. 1269 // However, neither the partition, nor the swizzId uniquely identifying 1270 // it (within the scope of its parent GPU) are guaranteed to remain valid, and 1271 // callers of this function must be prepared for removal of the partition 1272 // between nvGpuOpsGetGpuInfo() and nvGpuOpsDeviceCreate(). 1273 // 1274 static NV_STATUS getSwizzIdFromUserSmcPartHandle(RM_API *pRmApi, 1275 NvHandle hClient, 1276 NvHandle hParent, 1277 NvHandle hUserClient, 1278 NvHandle hUserGPUInstanceSubscription, 1279 NvU32 *swizzId) 1280 { 1281 NV_STATUS status; 1282 NvHandle dupedGPUInstanceSubscription; 1283 1284 // TODO: Acquired because serverutilGenResourceHandle expects RMAPI lock. Necessary? 1285 status = rmapiLockAcquire(RMAPI_LOCK_FLAGS_READ, RM_LOCK_MODULES_GPU_OPS); 1286 if (status != NV_OK) 1287 return status; 1288 status = serverutilGenResourceHandle(hClient, &dupedGPUInstanceSubscription); 1289 rmapiLockRelease(); 1290 if (status != NV_OK) 1291 return status; 1292 1293 status = pRmApi->DupObject(pRmApi, 1294 hClient, 1295 hParent, 1296 &dupedGPUInstanceSubscription, 1297 hUserClient, 1298 hUserGPUInstanceSubscription, 1299 NV04_DUP_HANDLE_FLAGS_REJECT_KERNEL_DUP_PRIVILEGE); 1300 if (status != NV_OK) 1301 return status; 1302 1303 // get GPUInstanceSubscription handle 1304 status = getSwizzIdFromSmcPartHandle(pRmApi, hClient, dupedGPUInstanceSubscription, 1305 swizzId); 1306 1307 pRmApi->Free(pRmApi, hClient, dupedGPUInstanceSubscription); 1308 1309 return status; 1310 } 1311 1312 static void nvGpuOpsRmSmcPartitionDestroy(struct gpuDevice *device) 1313 { 1314 subDeviceDesc *rmSubDevice = device->rmSubDevice; 1315 1316 if (rmSubDevice->smcPartition.info != NULL) 1317 { 1318 RM_API *pRmApi = rmapiGetInterface(RMAPI_EXTERNAL_KERNEL); 1319 1320 pRmApi->Free(pRmApi, 1321 device->session->handle, 1322 rmSubDevice->smcPartition.handle); 1323 1324 rmSubDevice->smcPartition.info = NULL; 1325 } 1326 } 1327 1328 static NV_STATUS nvGpuOpsRmSmcPartitionCreate(struct gpuDevice *device, const gpuInfo *pGpuInfo) 1329 { 1330 NV_STATUS status; 1331 OBJGPU *pGpu = NULL; 1332 subDeviceDesc *rmSubDevice = device->rmSubDevice; 1333 NvHandle dupUserHandle; 1334 RM_API *pRmApi = rmapiGetInterface(RMAPI_EXTERNAL_KERNEL); 1335 struct gpuSession *session = device->session; 1336 RsResourceRef *pSmcResourceRef; 1337 GPUInstanceSubscription *pGPUInstanceSubscription; 1338 NvU32 swizzId; 1339 1340 NV_ASSERT(rmSubDevice->smcPartition.info == NULL); 1341 1342 if (!pGpuInfo->smcEnabled) 1343 return NV_ERR_INVALID_ARGUMENT; 1344 1345 // TODO: Acquired because CliSetGpuContext expects RMAPI lock. Necessary? 1346 status = rmapiLockAcquire(RMAPI_LOCK_FLAGS_READ, RM_LOCK_MODULES_GPU_OPS); 1347 if (status != NV_OK) 1348 return status; 1349 status = CliSetGpuContext(session->handle, device->handle, &pGpu, NULL); 1350 rmapiLockRelease(); 1351 if (status != NV_OK) 1352 return status; 1353 1354 // Allocate the SMC partition object 1355 1356 // SMC GPU partitioning was disabled since we detected the partition in 1357 // nvGpuOpsGetGpuInfo 1358 if (!IS_MIG_IN_USE(pGpu)) 1359 return NV_ERR_INVALID_STATE; 1360 1361 status = pRmApi->DupObject(pRmApi, 1362 session->handle, 1363 rmSubDevice->subDeviceHandle, 1364 &dupUserHandle, 1365 pGpuInfo->smcUserClientInfo.hClient, 1366 pGpuInfo->smcUserClientInfo.hSmcPartRef, 1367 NV04_DUP_HANDLE_FLAGS_REJECT_KERNEL_DUP_PRIVILEGE); 1368 if (status != NV_OK) 1369 return status; 1370 1371 status = getSwizzIdFromSmcPartHandle(pRmApi, 1372 session->handle, 1373 dupUserHandle, 1374 &swizzId); 1375 if (status != NV_OK) 1376 goto cleanup_dup_user_handle; 1377 1378 // The swizzId changed since the call to nvGpuOpsGetGpuInfo: either the 1379 // object identified by smcUser*Handle changed, or else its configuration 1380 // was altered. 1381 if (swizzId != pGpuInfo->smcSwizzId) 1382 { 1383 status = NV_ERR_INVALID_STATE; 1384 goto cleanup_dup_user_handle; 1385 } 1386 1387 rmSubDevice->smcPartition.handle = dupUserHandle; 1388 1389 // get GPUInstanceSubscription handle 1390 // TODO: Acquired because serverutilGetResourceRef expects RMAPI lock. Necessary? 1391 status = rmapiLockAcquire(RMAPI_LOCK_FLAGS_READ, RM_LOCK_MODULES_GPU_OPS); 1392 if (status != NV_OK) 1393 goto cleanup_dup_user_handle; 1394 status = serverutilGetResourceRef(session->handle, rmSubDevice->smcPartition.handle, &pSmcResourceRef); 1395 rmapiLockRelease(); 1396 if (status != NV_OK) 1397 goto cleanup_dup_user_handle; 1398 1399 pGPUInstanceSubscription = dynamicCast(pSmcResourceRef->pResource, GPUInstanceSubscription); 1400 NV_ASSERT(pGPUInstanceSubscription != NULL); 1401 1402 NV_ASSERT(pGPUInstanceSubscription->pKernelMIGGpuInstance->swizzId == pGpuInfo->smcSwizzId); 1403 1404 rmSubDevice->smcPartition.info = pGPUInstanceSubscription->pKernelMIGGpuInstance; 1405 1406 return NV_OK; 1407 1408 cleanup_dup_user_handle: 1409 pRmApi->Free(pRmApi, session->handle, dupUserHandle); 1410 1411 return status; 1412 } 1413 1414 static NV_STATUS nvGpuOpsRmSubDeviceCreate(struct gpuDevice *device) 1415 { 1416 NV_STATUS status; 1417 NV2080_ALLOC_PARAMETERS nv2080AllocParams = { 0 }; 1418 deviceDesc *rmDevice = NULL; 1419 subDeviceDesc *rmSubDevice = NULL; 1420 struct gpuSession *session = device->session; 1421 RM_API *pRmApi = rmapiGetInterface(RMAPI_EXTERNAL_KERNEL); 1422 1423 NV_ASSERT(session); 1424 1425 // Query the rmDevice which needed to create an rmSubDevice. 1426 portSyncRwLockAcquireRead(session->btreeLock); 1427 status = findDescriptor(session->devices, device->deviceInstance, (void**)&rmDevice); 1428 if (status != NV_OK) 1429 { 1430 portSyncRwLockReleaseRead(session->btreeLock); 1431 return status; 1432 } 1433 1434 NV_ASSERT(rmDevice); 1435 NV_ASSERT(rmDevice->deviceHandle == device->handle); 1436 1437 // Find the existing rmSubDevice. 1438 // Otherwise, allocate an rmSubDevice. 1439 portSyncRwLockAcquireWrite(rmDevice->btreeLock); 1440 if (findDescriptor(rmDevice->subDevices, device->subdeviceInstance, (void**)&rmSubDevice) == NV_OK) 1441 { 1442 NV_ASSERT(rmSubDevice); 1443 device->rmSubDevice = rmSubDevice; 1444 device->subhandle = rmSubDevice->subDeviceHandle; 1445 rmSubDevice->refCount++; 1446 portSyncRwLockReleaseWrite(rmDevice->btreeLock); 1447 portSyncRwLockReleaseRead(session->btreeLock); 1448 return NV_OK; 1449 } 1450 1451 rmSubDevice = portMemAllocNonPaged(sizeof(*rmSubDevice)); 1452 if (rmSubDevice == NULL) 1453 return NV_ERR_INSUFFICIENT_RESOURCES; 1454 1455 portMemSet(rmSubDevice, 0, sizeof(*rmSubDevice)); 1456 1457 device->rmSubDevice = rmSubDevice; 1458 rmSubDevice->refCount = 1; 1459 nv2080AllocParams.subDeviceId = device->subdeviceInstance; 1460 device->subhandle = NV01_NULL_OBJECT; 1461 status = pRmApi->Alloc(pRmApi, 1462 session->handle, 1463 device->handle, 1464 &device->subhandle, 1465 NV20_SUBDEVICE_0, 1466 &nv2080AllocParams, 1467 sizeof(nv2080AllocParams)); 1468 if (status != NV_OK) 1469 goto cleanup_subdevice_desc; 1470 rmSubDevice->subDeviceHandle = device->subhandle; 1471 1472 status = trackDescriptor(&rmDevice->subDevices, device->subdeviceInstance, rmSubDevice); 1473 if (status != NV_OK) 1474 goto cleanup_subdevice; 1475 1476 rmDevice->subDeviceCount++; 1477 1478 portSyncRwLockReleaseWrite(rmDevice->btreeLock); 1479 portSyncRwLockReleaseRead(session->btreeLock); 1480 return NV_OK; 1481 1482 cleanup_subdevice: 1483 pRmApi->Free(pRmApi, session->handle, device->subhandle); 1484 cleanup_subdevice_desc: 1485 portMemFree(rmSubDevice); 1486 portSyncRwLockReleaseWrite(rmDevice->btreeLock); 1487 portSyncRwLockReleaseRead(session->btreeLock); 1488 return status; 1489 } 1490 1491 static NvBool isDevicePascalPlus(const struct gpuDevice *device) 1492 { 1493 NV_ASSERT(device->rmDevice); 1494 return device->rmDevice->arch >= GPU_ARCHITECTURE_PASCAL; 1495 } 1496 1497 static NvBool isDeviceVoltaPlus(const struct gpuDevice *device) 1498 { 1499 NV_ASSERT(device->rmDevice); 1500 return device->rmDevice->arch >= GPU_ARCHITECTURE_VOLTA; 1501 } 1502 1503 static NvBool isDeviceTuringPlus(const struct gpuDevice *device) 1504 { 1505 NV_ASSERT(device->rmDevice); 1506 return device->rmDevice->arch >= GPU_ARCHITECTURE_TURING; 1507 } 1508 1509 static NvBool isDeviceAmperePlus(const struct gpuDevice *device) 1510 { 1511 NV_ASSERT(device->rmDevice); 1512 return device->rmDevice->arch >= GPU_ARCHITECTURE_AMPERE; 1513 } 1514 1515 // Assume ...->Ampere->Ada->Hopper->... 1516 static NvBool isDeviceHopperPlus(const struct gpuDevice *device) 1517 { 1518 NV_ASSERT(device->rmDevice); 1519 return (device->rmDevice->arch >= GPU_ARCHITECTURE_HOPPER) && (device->rmDevice->arch != GPU_ARCHITECTURE_ADA); 1520 } 1521 1522 static UVM_LINK_TYPE rmControlToUvmNvlinkVersion(NvU32 nvlinkVersion) 1523 { 1524 if (nvlinkVersion == NV2080_CTRL_NVLINK_STATUS_NVLINK_VERSION_INVALID) 1525 return UVM_LINK_TYPE_NONE; 1526 else if (nvlinkVersion == NV2080_CTRL_NVLINK_STATUS_NVLINK_VERSION_1_0) 1527 return UVM_LINK_TYPE_NVLINK_1; 1528 else if (nvlinkVersion == NV2080_CTRL_NVLINK_STATUS_NVLINK_VERSION_2_0 || 1529 nvlinkVersion == NV2080_CTRL_NVLINK_STATUS_NVLINK_VERSION_2_2) 1530 return UVM_LINK_TYPE_NVLINK_2; 1531 else if (nvlinkVersion == NV2080_CTRL_NVLINK_STATUS_NVLINK_VERSION_3_0 || 1532 nvlinkVersion == NV2080_CTRL_NVLINK_STATUS_NVLINK_VERSION_3_1) 1533 return UVM_LINK_TYPE_NVLINK_3; 1534 else if (nvlinkVersion == NV2080_CTRL_NVLINK_STATUS_NVLINK_VERSION_4_0) 1535 return UVM_LINK_TYPE_NVLINK_4; 1536 1537 NV_ASSERT(0); 1538 return (NvU32)-1; 1539 } 1540 1541 static NV_STATUS queryFbInfo(struct gpuDevice *device) 1542 { 1543 NV_STATUS nvStatus = NV_OK; 1544 NV2080_CTRL_FB_GET_INFO_PARAMS fbInfoParams; 1545 NV2080_CTRL_CMD_FB_GET_FB_REGION_INFO_PARAMS *fbRegionInfoParams; 1546 RM_API *pRmApi = rmapiGetInterface(RMAPI_EXTERNAL_KERNEL); 1547 NvU32 i; 1548 1549 struct fbInputParams 1550 { 1551 NV2080_CTRL_FB_INFO heapSize; 1552 NV2080_CTRL_FB_INFO reservedHeapSize; 1553 NV2080_CTRL_FB_INFO zeroFb; 1554 } fbParams; 1555 1556 fbRegionInfoParams = portMemAllocNonPaged(sizeof(*fbRegionInfoParams)); 1557 if (fbRegionInfoParams == NULL) 1558 return NV_ERR_NO_MEMORY; 1559 1560 portMemSet(fbRegionInfoParams, 0, sizeof(*fbRegionInfoParams)); 1561 portMemSet(&fbInfoParams, 0, sizeof(fbInfoParams)); 1562 portMemSet(&fbParams, 0, sizeof(fbParams)); 1563 1564 // Set up the list of parameters we are looking to extract 1565 fbParams.heapSize.index = NV2080_CTRL_FB_INFO_INDEX_HEAP_SIZE; 1566 fbParams.reservedHeapSize.index = NV2080_CTRL_FB_INFO_INDEX_VISTA_RESERVED_HEAP_SIZE; 1567 fbParams.zeroFb.index = NV2080_CTRL_FB_INFO_INDEX_FB_IS_BROKEN; 1568 1569 fbInfoParams.fbInfoListSize = sizeof(fbParams) / sizeof(fbParams.heapSize); 1570 fbInfoParams.fbInfoList = NV_PTR_TO_NvP64(&fbParams); 1571 1572 nvStatus = pRmApi->Control(pRmApi, 1573 device->session->handle, 1574 device->subhandle, 1575 NV2080_CTRL_CMD_FB_GET_INFO, 1576 &fbInfoParams, 1577 sizeof(fbInfoParams)); 1578 if (nvStatus != NV_OK) 1579 goto out; 1580 1581 nvStatus = pRmApi->Control(pRmApi, 1582 device->session->handle, 1583 device->subhandle, 1584 NV2080_CTRL_CMD_FB_GET_FB_REGION_INFO, 1585 fbRegionInfoParams, 1586 sizeof(*fbRegionInfoParams)); 1587 if (nvStatus != NV_OK) 1588 goto out; 1589 1590 device->fbInfo.heapSize = fbParams.heapSize.data; 1591 device->fbInfo.reservedHeapSize = fbParams.reservedHeapSize.data; 1592 device->fbInfo.bZeroFb = (NvBool)fbParams.zeroFb.data; 1593 1594 device->fbInfo.maxAllocatableAddress = 0; 1595 1596 for (i = 0; i < fbRegionInfoParams->numFBRegions; ++i) 1597 { 1598 device->fbInfo.maxAllocatableAddress = NV_MAX(device->fbInfo.maxAllocatableAddress, 1599 fbRegionInfoParams->fbRegion[i].limit); 1600 } 1601 1602 out: 1603 portMemFree(fbRegionInfoParams); 1604 return nvStatus; 1605 } 1606 1607 // Return the PCIE link cap max speed associated with the given subdevice in 1608 // megabytes per second. 1609 static NV_STATUS getPCIELinkRateMBps(NvHandle hClient, NvHandle hSubDevice, NvU32 *pcieLinkRate) 1610 { 1611 // PCI Express Base Specification: https://www.pcisig.com/specifications/pciexpress 1612 const NvU32 PCIE_1_ENCODING_RATIO_TOTAL = 10; 1613 const NvU32 PCIE_1_ENCODING_RATIO_EFFECTIVE = 8; 1614 const NvU32 PCIE_2_ENCODING_RATIO_TOTAL = 10; 1615 const NvU32 PCIE_2_ENCODING_RATIO_EFFECTIVE = 8; 1616 const NvU32 PCIE_3_ENCODING_RATIO_TOTAL = 130; 1617 const NvU32 PCIE_3_ENCODING_RATIO_EFFECTIVE = 128; 1618 const NvU32 PCIE_4_ENCODING_RATIO_TOTAL = 130; 1619 const NvU32 PCIE_4_ENCODING_RATIO_EFFECTIVE = 128; 1620 const NvU32 PCIE_5_ENCODING_RATIO_TOTAL = 130; 1621 const NvU32 PCIE_5_ENCODING_RATIO_EFFECTIVE = 128; 1622 const NvU32 PCIE_6_ENCODING_RATIO_TOTAL = 256; 1623 const NvU32 PCIE_6_ENCODING_RATIO_EFFECTIVE = 242; 1624 1625 RM_API *pRmApi = rmapiGetInterface(RMAPI_EXTERNAL_KERNEL); 1626 NV2080_CTRL_BUS_INFO busInfo = {0}; 1627 NV2080_CTRL_BUS_GET_INFO_PARAMS busInfoParams = {0}; 1628 NvU32 linkRate = 0; 1629 NvU32 lanes; 1630 1631 busInfo.index = NV2080_CTRL_BUS_INFO_INDEX_PCIE_GPU_LINK_CAPS; 1632 busInfoParams.busInfoListSize = 1; 1633 busInfoParams.busInfoList = NV_PTR_TO_NvP64(&busInfo); 1634 1635 NV_STATUS status = pRmApi->Control(pRmApi, 1636 hClient, 1637 hSubDevice, 1638 NV2080_CTRL_CMD_BUS_GET_INFO, 1639 &busInfoParams, 1640 sizeof(busInfoParams)); 1641 if (status != NV_OK) 1642 { 1643 NV_PRINTF(LEVEL_ERROR, "%s:%d: %s\n", __FUNCTION__, 1644 __LINE__, nvstatusToString(status)); 1645 return status; 1646 } 1647 1648 lanes = DRF_VAL(2080, _CTRL_BUS_INFO, _PCIE_LINK_CAP_MAX_WIDTH, busInfo.data); 1649 1650 // Bug 2606540: RM reports PCIe transfer rate in GT/s but labels it as Gbps 1651 switch (DRF_VAL(2080, _CTRL_BUS_INFO, _PCIE_LINK_CAP_MAX_SPEED, busInfo.data)) 1652 { 1653 case NV2080_CTRL_BUS_INFO_PCIE_LINK_CAP_MAX_SPEED_2500MBPS: 1654 linkRate = ((2500 * lanes * PCIE_1_ENCODING_RATIO_EFFECTIVE) 1655 / PCIE_1_ENCODING_RATIO_TOTAL) / 8; 1656 break; 1657 case NV2080_CTRL_BUS_INFO_PCIE_LINK_CAP_MAX_SPEED_5000MBPS: 1658 linkRate = ((5000 * lanes * PCIE_2_ENCODING_RATIO_EFFECTIVE) 1659 / PCIE_2_ENCODING_RATIO_TOTAL) / 8; 1660 break; 1661 case NV2080_CTRL_BUS_INFO_PCIE_LINK_CAP_MAX_SPEED_8000MBPS: 1662 linkRate = ((8000 * lanes * PCIE_3_ENCODING_RATIO_EFFECTIVE) 1663 / PCIE_3_ENCODING_RATIO_TOTAL) / 8; 1664 break; 1665 case NV2080_CTRL_BUS_INFO_PCIE_LINK_CAP_MAX_SPEED_16000MBPS: 1666 linkRate = ((16000 * lanes * PCIE_4_ENCODING_RATIO_EFFECTIVE) 1667 / PCIE_4_ENCODING_RATIO_TOTAL) / 8; 1668 break; 1669 case NV2080_CTRL_BUS_INFO_PCIE_LINK_CAP_MAX_SPEED_32000MBPS: 1670 linkRate = ((32000 * lanes * PCIE_5_ENCODING_RATIO_EFFECTIVE) 1671 / PCIE_5_ENCODING_RATIO_TOTAL) / 8; 1672 break; 1673 case NV2080_CTRL_BUS_INFO_PCIE_LINK_CAP_MAX_SPEED_64000MBPS: 1674 linkRate = ((64000 * lanes * PCIE_6_ENCODING_RATIO_EFFECTIVE) 1675 / PCIE_6_ENCODING_RATIO_TOTAL) / 8; 1676 break; 1677 default: 1678 status = NV_ERR_INVALID_STATE; 1679 NV_PRINTF(LEVEL_ERROR, "Unknown PCIe speed\n"); 1680 } 1681 1682 *pcieLinkRate = linkRate; 1683 1684 return status; 1685 } 1686 1687 NV_STATUS nvGpuOpsDeviceCreate(struct gpuSession *session, 1688 const gpuInfo *pGpuInfo, 1689 const NvProcessorUuid *gpuUuid, 1690 struct gpuDevice **outDevice, 1691 NvBool bCreateSmcPartition) 1692 { 1693 NV_STATUS status; 1694 struct gpuDevice *device = NULL; 1695 NV0000_CTRL_GPU_GET_UUID_INFO_PARAMS gpuIdInfoParams = {{0}}; 1696 RM_API *pRmApi = rmapiGetInterface(RMAPI_EXTERNAL_KERNEL); 1697 OBJGPU *pGpu; 1698 1699 device = portMemAllocNonPaged(sizeof(*device)); 1700 if (device == NULL) 1701 return NV_ERR_INSUFFICIENT_RESOURCES; 1702 portMemSet(device, 0, sizeof(*device)); 1703 device->session = session; 1704 1705 portMemCopy(&gpuIdInfoParams.gpuUuid, NV_UUID_LEN, gpuUuid->uuid, NV_UUID_LEN); 1706 gpuIdInfoParams.flags = NV0000_CTRL_CMD_GPU_GET_UUID_INFO_FLAGS_FORMAT_BINARY; 1707 status = pRmApi->Control(pRmApi, 1708 session->handle, 1709 session->handle, 1710 NV0000_CTRL_CMD_GPU_GET_UUID_INFO, 1711 &gpuIdInfoParams, 1712 sizeof(gpuIdInfoParams)); 1713 if (status != NV_OK) 1714 goto cleanup_device_obj; 1715 1716 device->deviceInstance = gpuIdInfoParams.deviceInstance; 1717 device->subdeviceInstance = gpuIdInfoParams.subdeviceInstance; 1718 device->gpuId = gpuIdInfoParams.gpuId; 1719 1720 portMemCopy(&device->info, sizeof(device->info), pGpuInfo, sizeof(*pGpuInfo)); 1721 1722 status = nvGpuOpsRmDeviceCreate(device); 1723 if (status != NV_OK) 1724 goto cleanup_device_obj; 1725 1726 status = nvGpuOpsRmSubDeviceCreate(device); 1727 if (status != NV_OK) 1728 goto cleanup_rm_device; 1729 1730 if (bCreateSmcPartition) 1731 { 1732 status = nvGpuOpsRmSmcPartitionCreate(device, pGpuInfo); 1733 if (status != NV_OK) 1734 goto cleanup_rm_subdevice; 1735 } 1736 1737 // Create the work submission info mapping: 1738 // * SMC is disabled, we create for the device. 1739 // * SMC is enabled, we create only for SMC partitions. 1740 if (isDeviceVoltaPlus(device) && (!pGpuInfo->smcEnabled || bCreateSmcPartition)) 1741 { 1742 status = gpuDeviceMapUsermodeRegion(device); 1743 if (status != NV_OK) 1744 goto cleanup_smc_partition; 1745 } 1746 1747 status = gpuDeviceRmSubDeviceInitEcc(device); 1748 if (status != NV_OK) 1749 goto cleanup_subdevice_usermode; 1750 1751 status = queryFbInfo(device); 1752 if (status != NV_OK) 1753 goto cleanup_ecc; 1754 1755 device->isTccMode = NV_FALSE; 1756 1757 // Non-TCC mode on Windows implies WDDM mode. 1758 device->isWddmMode = !device->isTccMode; 1759 1760 status = findDeviceClasses(session->handle, 1761 device->handle, 1762 device->subhandle, 1763 &device->hostClass, 1764 &device->ceClass, 1765 &device->computeClass, 1766 &device->faultBufferClass, 1767 &device->accessCounterBufferClass, 1768 &device->sec2Class); 1769 if (status != NV_OK) 1770 goto cleanup_ecc; 1771 1772 mapInit(&device->kern2PhysDescrMap, portMemAllocatorGetGlobalNonPaged()); 1773 1774 status = rmapiLockAcquire(RMAPI_LOCK_FLAGS_READ, RM_LOCK_MODULES_GPU_OPS); 1775 if (status != NV_OK) 1776 goto cleanup_ecc; 1777 status = CliSetGpuContext(session->handle, device->handle, &pGpu, NULL); 1778 rmapiLockRelease(); 1779 if (status != NV_OK) 1780 goto cleanup_ecc; 1781 1782 if (IS_VIRTUAL_WITH_HEAVY_SRIOV(pGpu)) 1783 { 1784 device->pPagingChannelRpcMutex = portSyncMutexCreate(portMemAllocatorGetGlobalNonPaged()); 1785 if (device->pPagingChannelRpcMutex == NULL) 1786 { 1787 status = NV_ERR_NO_MEMORY; 1788 goto cleanup_ecc; 1789 } 1790 } 1791 1792 *outDevice = device; 1793 return NV_OK; 1794 1795 cleanup_ecc: 1796 gpuDeviceRmSubDeviceDeinitEcc(device); 1797 cleanup_subdevice_usermode: 1798 gpuDeviceDestroyUsermodeRegion(device); 1799 cleanup_smc_partition: 1800 nvGpuOpsRmSmcPartitionDestroy(device); 1801 cleanup_rm_subdevice: 1802 nvGpuOpsDeviceDestroy(device); 1803 device = NULL; 1804 cleanup_rm_device: 1805 if (device) 1806 nvGpuOpsRmDeviceDestroy(device); 1807 cleanup_device_obj: 1808 portMemFree(device); 1809 return status; 1810 } 1811 1812 NV_STATUS nvGpuOpsDeviceDestroy(struct gpuDevice *device) 1813 { 1814 deviceDesc *rmDevice = device->rmDevice; 1815 subDeviceDesc *rmSubDevice = device->rmSubDevice; 1816 RM_API *pRmApi = rmapiGetInterface(RMAPI_EXTERNAL_KERNEL); 1817 1818 rmSubDevice->refCount--; 1819 1820 if (rmSubDevice->refCount == 0) 1821 { 1822 gpuDeviceDestroyUsermodeRegion(device); 1823 1824 gpuDeviceRmSubDeviceDeinitEcc(device); 1825 1826 nvGpuOpsRmSmcPartitionDestroy(device); 1827 1828 portSyncRwLockAcquireWrite(rmDevice->btreeLock); 1829 rmDevice->subDeviceCount--; 1830 deleteDescriptor(&rmDevice->subDevices, device->subdeviceInstance, (void**)&rmSubDevice); 1831 pRmApi->Free(pRmApi, device->session->handle, rmSubDevice->subDeviceHandle); 1832 portMemFree(rmSubDevice); 1833 portSyncRwLockReleaseWrite(rmDevice->btreeLock); 1834 1835 nvGpuOpsRmDeviceDestroy(device); 1836 } 1837 1838 mapDestroy(&device->kern2PhysDescrMap); 1839 1840 if (device->pPagingChannelRpcMutex != NULL) 1841 portSyncMutexDestroy(device->pPagingChannelRpcMutex); 1842 1843 portMemFree(device); 1844 return NV_OK; 1845 } 1846 1847 NV_STATUS nvGpuOpsOwnPageFaultIntr(struct gpuDevice *device, 1848 NvBool bOwnInterrupts) 1849 { 1850 RM_API *pRmApi = rmapiGetInterface(RMAPI_EXTERNAL_KERNEL); 1851 NV2080_CTRL_MC_CHANGE_REPLAYABLE_FAULT_OWNERSHIP_PARAMS changeParams = {0}; 1852 changeParams.bOwnedByRm = !bOwnInterrupts; 1853 return pRmApi->Control(pRmApi, 1854 device->session->handle, 1855 device->subhandle, 1856 NV2080_CTRL_CMD_MC_CHANGE_REPLAYABLE_FAULT_OWNERSHIP, 1857 &changeParams, 1858 sizeof(changeParams)); 1859 } 1860 1861 static NV_STATUS getAddressSpaceInfo(struct gpuAddressSpace *vaSpace, 1862 OBJGPU *pGpu, 1863 UvmGpuAddressSpaceInfo *vaSpaceInfo) 1864 { 1865 NV_STATUS status; 1866 NV0080_CTRL_DMA_ADV_SCHED_GET_VA_CAPS_PARAMS params = {0}; 1867 OBJVASPACE *pVAS = NULL; 1868 RM_API *pRmApi = rmapiGetInterface(RMAPI_EXTERNAL_KERNEL); 1869 struct gpuDevice *device = vaSpace->device; 1870 struct gpuSession *session = device->session; 1871 subDeviceDesc *rmSubDevice = device->rmSubDevice; 1872 1873 params.hVASpace = vaSpace->handle; 1874 status = pRmApi->Control(pRmApi, 1875 session->handle, 1876 device->handle, 1877 NV0080_CTRL_CMD_DMA_ADV_SCHED_GET_VA_CAPS, 1878 ¶ms, 1879 sizeof(params)); 1880 if (status != NV_OK) 1881 return status; 1882 1883 vaSpaceInfo->bigPageSize = params.bigPageSize; 1884 1885 // TODO: Acquired because resserv expects RMAPI lock. Necessary? 1886 { 1887 RsClient *pClient; 1888 status = rmapiLockAcquire(RMAPI_LOCK_FLAGS_READ, RM_LOCK_MODULES_GPU_OPS); 1889 if (status != NV_OK) 1890 return status; 1891 1892 status = serverGetClientUnderLock(&g_resServ, session->handle, &pClient); 1893 if (status == NV_OK) 1894 { 1895 status = vaspaceGetByHandleOrDeviceDefault(pClient, device->handle, vaSpace->handle, &pVAS); 1896 } 1897 rmapiLockRelease(); 1898 if (status != NV_OK) 1899 return status; 1900 } 1901 1902 vaSpaceInfo->atsEnabled = vaspaceIsAtsEnabled(pVAS); 1903 1904 if (isDeviceTuringPlus(vaSpace->device)) 1905 { 1906 // 1907 // On Turing+ use the VIRTUAL_FUNCTION so this works fine in hosts and 1908 // guests 1909 // 1910 void *bar0Mapping = gpuBar0BaseAddress(pGpu); 1911 vaSpaceInfo->time0Offset = (NvU32 *)((NvU8*)bar0Mapping + GPU_GET_VREG_OFFSET(pGpu, NV_VIRTUAL_FUNCTION_TIME_0)); 1912 vaSpaceInfo->time1Offset = (NvU32 *)((NvU8*)bar0Mapping + GPU_GET_VREG_OFFSET(pGpu, NV_VIRTUAL_FUNCTION_TIME_1)); 1913 } 1914 else if (isDeviceVoltaPlus(vaSpace->device)) 1915 { 1916 NV_ASSERT(rmSubDevice->clientRegionMapping); 1917 1918 // 1919 // On Volta prefer USERMODE mappings for better passthrough 1920 // performance on some hypervisors (see CL23003453 for more details) 1921 // 1922 vaSpaceInfo->time0Offset = (NvU32 *)((NvU8*)rmSubDevice->clientRegionMapping + NVC361_TIME_0); 1923 vaSpaceInfo->time1Offset = (NvU32 *)((NvU8*)rmSubDevice->clientRegionMapping + NVC361_TIME_1); 1924 } 1925 else 1926 { 1927 void *bar0Mapping = gpuBar0BaseAddress(pGpu); 1928 vaSpaceInfo->time0Offset = (NvU32 *)((NvU8*)bar0Mapping + NV_PTIMER_TIME_0); 1929 vaSpaceInfo->time1Offset = (NvU32 *)((NvU8*)bar0Mapping + NV_PTIMER_TIME_1); 1930 } 1931 1932 if (IS_MIG_IN_USE(pGpu)) 1933 { 1934 KERNEL_MIG_GPU_INSTANCE *pKernelMIGGpuInstance = rmSubDevice->smcPartition.info; 1935 MIG_RESOURCE_ALLOCATION *pResourceAllocation = &pKernelMIGGpuInstance->resourceAllocation; 1936 1937 vaSpaceInfo->maxSubctxCount = pResourceAllocation->veidCount; 1938 vaSpaceInfo->smcGpcCount = pResourceAllocation->gpcCount; 1939 } 1940 else 1941 { 1942 NV2080_CTRL_FIFO_GET_INFO_PARAMS *fifoGetInfoParams; 1943 1944 // 1945 // NV2080_CTRL_FIFO_GET_INFO_PARAMS takes over 2KB, so we use a heap 1946 // allocation 1947 // 1948 fifoGetInfoParams = portMemAllocNonPaged(sizeof(*fifoGetInfoParams)); 1949 if (fifoGetInfoParams == NULL) 1950 return NV_ERR_NO_MEMORY; 1951 1952 fifoGetInfoParams->fifoInfoTblSize = 1; 1953 fifoGetInfoParams->fifoInfoTbl[0].index = NV2080_CTRL_FIFO_INFO_INDEX_MAX_SUBCONTEXT_PER_GROUP; 1954 1955 status = pRmApi->Control(pRmApi, 1956 session->handle, 1957 rmSubDevice->subDeviceHandle, 1958 NV2080_CTRL_CMD_FIFO_GET_INFO, 1959 fifoGetInfoParams, 1960 sizeof(*fifoGetInfoParams)); 1961 1962 vaSpaceInfo->maxSubctxCount = fifoGetInfoParams->fifoInfoTbl[0].data; 1963 1964 portMemFree(fifoGetInfoParams); 1965 1966 if (status != NV_OK) 1967 return status; 1968 } 1969 1970 return NV_OK; 1971 } 1972 1973 // This function will create a new address space object of type FERMI_VASPACE_A. 1974 NV_STATUS nvGpuOpsAddressSpaceCreate(struct gpuDevice *device, 1975 NvU64 vaBase, 1976 NvU64 vaSize, 1977 struct gpuAddressSpace **vaSpace, 1978 UvmGpuAddressSpaceInfo *vaSpaceInfo) 1979 { 1980 NV_STATUS status; 1981 struct gpuAddressSpace *gpuVaSpace = NULL; 1982 OBJGPU *pGpu = NULL; 1983 NV_VASPACE_ALLOCATION_PARAMETERS vaParams = {0}; 1984 RM_API *pRmApi = rmapiGetInterface(RMAPI_EXTERNAL_KERNEL); 1985 PORT_MEM_ALLOCATOR *pAlloc = portMemAllocatorGetGlobalNonPaged(); 1986 1987 gpuVaSpace = portMemAllocNonPaged(sizeof(*gpuVaSpace)); 1988 if (gpuVaSpace == NULL) 1989 return NV_ERR_NO_MEMORY; 1990 1991 portMemSet(gpuVaSpace, 0, sizeof(*gpuVaSpace)); 1992 gpuVaSpace->vaBase = vaBase; 1993 gpuVaSpace->vaSize = vaSize; 1994 gpuVaSpace->handle = NV01_NULL_OBJECT; 1995 gpuVaSpace->allocationsLock = portSyncRwLockCreate(pAlloc); 1996 gpuVaSpace->cpuMappingsLock = portSyncRwLockCreate(pAlloc); 1997 gpuVaSpace->physAllocationsLock = portSyncRwLockCreate(pAlloc); 1998 1999 *vaSpace = NULL; 2000 portMemSet(vaSpaceInfo, 0, sizeof(*vaSpaceInfo)); 2001 2002 // Create a new vaSpace object 2003 vaParams.index= NV_VASPACE_ALLOCATION_INDEX_GPU_NEW; 2004 vaParams.vaBase = gpuVaSpace->vaBase; 2005 vaParams.vaSize = gpuVaSpace->vaSize; 2006 vaParams.flags = gpuVaSpace->vaSize ? 2007 NV_VASPACE_ALLOCATION_FLAGS_SHARED_MANAGEMENT : 2008 NV_VASPACE_ALLOCATION_FLAGS_NONE; 2009 2010 // TODO: Acquired because CliSetGpuContext expects RMAPI lock. Necessary? 2011 status = rmapiLockAcquire(RMAPI_LOCK_FLAGS_READ, RM_LOCK_MODULES_GPU_OPS); 2012 if (status != NV_OK) 2013 goto cleanup_vaspace; 2014 status = CliSetGpuContext(device->session->handle, device->handle, &pGpu, NULL); 2015 rmapiLockRelease(); 2016 if (status != NV_OK) 2017 { 2018 goto cleanup_vaspace; 2019 } 2020 2021 status = pRmApi->Alloc(pRmApi, 2022 device->session->handle, 2023 device->handle, 2024 &gpuVaSpace->handle, FERMI_VASPACE_A, 2025 &vaParams, 2026 sizeof(vaParams)); 2027 if (status != NV_OK) 2028 { 2029 goto cleanup_struct; 2030 } 2031 2032 // If base & Size were not provided before, they would have been filled now 2033 gpuVaSpace->vaBase = vaParams.vaBase; 2034 gpuVaSpace->vaSize = vaParams.vaSize; 2035 gpuVaSpace->device = device; 2036 2037 status = getAddressSpaceInfo(gpuVaSpace, pGpu, vaSpaceInfo); 2038 if (status != NV_OK) 2039 { 2040 goto cleanup_vaspace; 2041 } 2042 2043 *vaSpace = gpuVaSpace; 2044 return status; 2045 2046 cleanup_vaspace: 2047 pRmApi->Free(pRmApi, device->session->handle, gpuVaSpace->handle); 2048 2049 cleanup_struct: 2050 portSyncRwLockDestroy(gpuVaSpace->allocationsLock); 2051 portSyncRwLockDestroy(gpuVaSpace->cpuMappingsLock); 2052 portSyncRwLockDestroy(gpuVaSpace->physAllocationsLock); 2053 portMemFree(gpuVaSpace); 2054 return status; 2055 } 2056 2057 NV_STATUS nvGpuOpsDupAddressSpace(struct gpuDevice *device, 2058 NvHandle hUserClient, 2059 NvHandle hUserVASpace, 2060 struct gpuAddressSpace **vaSpace, 2061 UvmGpuAddressSpaceInfo *vaSpaceInfo) 2062 { 2063 NV_STATUS status = NV_OK; 2064 struct gpuAddressSpace *gpuVaSpace = NULL; 2065 struct gpuSession *session = device->session; 2066 OBJVASPACE *pVAS = NULL; 2067 OBJGPU *pGpu = NULL; 2068 RsResourceRef *pVaSpaceRef; 2069 RsResourceRef *pDeviceRef; 2070 Device *pDevice = NULL; 2071 RM_API *pRmApi = rmapiGetInterface(RMAPI_EXTERNAL_KERNEL); 2072 PORT_MEM_ALLOCATOR *pAlloc = portMemAllocatorGetGlobalNonPaged(); 2073 2074 *vaSpace = NULL; 2075 portMemSet(vaSpaceInfo, 0, sizeof(*vaSpaceInfo)); 2076 2077 // TODO - Move this check to RMDupObject later. 2078 // TODO: Acquired because serverutilGetResourceRef expects RMAPI lock. Necessary? 2079 // Find the device associated with the hUserVASpace and verify that the UUID belongs to it. 2080 status = rmapiLockAcquire(RMAPI_LOCK_FLAGS_READ, RM_LOCK_MODULES_GPU_OPS); 2081 if (status != NV_OK) 2082 return status; 2083 status = serverutilGetResourceRef(hUserClient, hUserVASpace, &pVaSpaceRef); 2084 rmapiLockRelease(); 2085 if (status != NV_OK) 2086 return status; 2087 2088 if (!dynamicCast(pVaSpaceRef->pResource, VaSpaceApi)) 2089 return NV_ERR_INVALID_OBJECT; 2090 2091 // The parent must be valid and a device if this is a VA space handle 2092 // TODO: Acquired because serverutilGetResourceRef expects RMAPI lock. Necessary? 2093 status = rmapiLockAcquire(RMAPI_LOCK_FLAGS_READ, RM_LOCK_MODULES_GPU_OPS); 2094 if (status != NV_OK) 2095 return status; 2096 status = serverutilGetResourceRef(hUserClient, pVaSpaceRef->pParentRef->hResource, &pDeviceRef); 2097 rmapiLockRelease(); 2098 NV_ASSERT(status == NV_OK); 2099 2100 pDevice = dynamicCast(pDeviceRef->pResource, Device); 2101 NV_ASSERT(pDevice != NULL); 2102 2103 if (pDevice->deviceInst != device->deviceInstance) 2104 return NV_ERR_OTHER_DEVICE_FOUND; 2105 2106 gpuVaSpace = portMemAllocNonPaged(sizeof(*gpuVaSpace)); 2107 if (gpuVaSpace == NULL) 2108 return NV_ERR_NO_MEMORY; 2109 2110 portMemSet(gpuVaSpace, 0, sizeof(*gpuVaSpace)); 2111 2112 gpuVaSpace->device = device; 2113 gpuVaSpace->allocationsLock = portSyncRwLockCreate(pAlloc); 2114 gpuVaSpace->cpuMappingsLock = portSyncRwLockCreate(pAlloc); 2115 gpuVaSpace->physAllocationsLock = portSyncRwLockCreate(pAlloc); 2116 2117 // dup the vaspace 2118 gpuVaSpace->handle = NV01_NULL_OBJECT; 2119 status = pRmApi->DupObject(pRmApi, 2120 session->handle, 2121 device->handle, 2122 &gpuVaSpace->handle, 2123 hUserClient, 2124 hUserVASpace, 2125 NV04_DUP_HANDLE_FLAGS_REJECT_KERNEL_DUP_PRIVILEGE); 2126 if (status != NV_OK) 2127 goto cleanup_vaspace; 2128 2129 // TODO: Acquired because these functions expect RMAPI lock. Necessary? 2130 { 2131 RsClient *pClient; 2132 status = rmapiLockAcquire(RMAPI_LOCK_FLAGS_READ, RM_LOCK_MODULES_GPU_OPS); 2133 if (status != NV_OK) 2134 goto cleanup_dup_vaspace; 2135 2136 status = serverGetClientUnderLock(&g_resServ, session->handle, &pClient); 2137 if (status == NV_OK) 2138 { 2139 status = CliSetGpuContext(session->handle, device->handle, &pGpu, NULL); 2140 if (status == NV_OK) 2141 { 2142 status = vaspaceGetByHandleOrDeviceDefault(pClient, device->handle, gpuVaSpace->handle, &pVAS); 2143 } 2144 } 2145 rmapiLockRelease(); 2146 if (status != NV_OK) 2147 goto cleanup_dup_vaspace; 2148 } 2149 2150 if (!vaspaceIsExternallyOwned(pVAS)) 2151 { 2152 status = NV_ERR_INVALID_FLAGS; 2153 goto cleanup_dup_vaspace; 2154 } 2155 2156 status = getAddressSpaceInfo(gpuVaSpace, pGpu, vaSpaceInfo); 2157 if (status != NV_OK) 2158 goto cleanup_dup_vaspace; 2159 2160 *vaSpace = gpuVaSpace; 2161 2162 return NV_OK; 2163 2164 cleanup_dup_vaspace: 2165 pRmApi->Free(pRmApi, session->handle, gpuVaSpace->handle); 2166 cleanup_vaspace: 2167 portSyncRwLockDestroy(gpuVaSpace->allocationsLock); 2168 portSyncRwLockDestroy(gpuVaSpace->cpuMappingsLock); 2169 portSyncRwLockDestroy(gpuVaSpace->physAllocationsLock); 2170 portMemFree(gpuVaSpace); 2171 return status; 2172 } 2173 2174 // Get the NVLink connection status for the given device. On success, caller is 2175 // responsible of freeing the memory. 2176 static NV_STATUS allocNvlinkStatus(NvHandle hClient, 2177 NvHandle hSubDevice, 2178 NV2080_CTRL_CMD_NVLINK_GET_NVLINK_STATUS_PARAMS **nvlinkStatusOut) 2179 { 2180 NV_STATUS status; 2181 NV2080_CTRL_CMD_NVLINK_GET_NVLINK_STATUS_PARAMS *nvlinkStatus; 2182 RM_API *pRmApi = rmapiGetInterface(RMAPI_EXTERNAL_KERNEL); 2183 2184 *nvlinkStatusOut = NULL; 2185 2186 nvlinkStatus = portMemAllocNonPaged(sizeof(*nvlinkStatus)); 2187 if (nvlinkStatus == NULL) 2188 return NV_ERR_NO_MEMORY; 2189 2190 portMemSet(nvlinkStatus, 0, sizeof(*nvlinkStatus)); 2191 status = pRmApi->Control(pRmApi, 2192 hClient, 2193 hSubDevice, 2194 NV2080_CTRL_CMD_NVLINK_GET_NVLINK_STATUS, 2195 nvlinkStatus, 2196 sizeof(*nvlinkStatus)); 2197 if (status == NV_ERR_NOT_SUPPORTED) 2198 { 2199 portMemSet(nvlinkStatus, 0, sizeof(*nvlinkStatus)); 2200 } 2201 else if (status != NV_OK) 2202 { 2203 portMemFree(nvlinkStatus); 2204 NV_PRINTF(LEVEL_ERROR, "%s:%d: %s\n", __FUNCTION__, 2205 __LINE__, nvstatusToString(status)); 2206 return status; 2207 } 2208 2209 *nvlinkStatusOut = nvlinkStatus; 2210 2211 return NV_OK; 2212 } 2213 2214 // If the given NvLink connection has a GPU device as an endpoint, return the 2215 // version of the NvLink connection with that GPU , and the maximum 2216 // unidirectional bandwidth in megabytes per second. Otherwise, return 2217 // NV2080_CTRL_NVLINK_STATUS_NVLINK_VERSION_INVALID. 2218 static NvU32 getNvlinkConnectionToGpu(const NV2080_CTRL_CMD_NVLINK_GET_NVLINK_STATUS_PARAMS *nvlinkStatus, 2219 OBJGPU *pGpu, 2220 NvU32 *linkBandwidthMBps) 2221 { 2222 NvU32 i; 2223 2224 NvU32 version = NV2080_CTRL_NVLINK_STATUS_NVLINK_VERSION_INVALID; 2225 NvU32 domain = gpuGetDomain(pGpu); 2226 NvU16 bus = gpuGetBus(pGpu); 2227 NvU16 device = gpuGetDevice(pGpu); 2228 NvU32 bwMBps = 0; 2229 2230 for (i = 0; i < NV2080_CTRL_NVLINK_MAX_LINKS; ++i) 2231 { 2232 if (((1 << i) & nvlinkStatus->enabledLinkMask) == 0) 2233 continue; 2234 2235 if (!nvlinkStatus->linkInfo[i].connected) 2236 continue; 2237 2238 // Skip loopback/loopout connections 2239 if (nvlinkStatus->linkInfo[i].loopProperty != NV2080_CTRL_NVLINK_STATUS_LOOP_PROPERTY_NONE) 2240 continue; 2241 2242 if (nvlinkStatus->linkInfo[i].remoteDeviceInfo.deviceType == NV2080_CTRL_NVLINK_DEVICE_INFO_DEVICE_TYPE_GPU) 2243 { 2244 if ((nvlinkStatus->linkInfo[i].remoteDeviceInfo.deviceIdFlags & 2245 NV2080_CTRL_NVLINK_DEVICE_INFO_DEVICE_ID_FLAGS_PCI) == 0) 2246 { 2247 NV_PRINTF(LEVEL_ERROR, "No PCI information for GPU.\n"); 2248 continue; 2249 } 2250 2251 if ((domain == nvlinkStatus->linkInfo[i].remoteDeviceInfo.domain) && 2252 (bus == nvlinkStatus->linkInfo[i].remoteDeviceInfo.bus) && 2253 (device == nvlinkStatus->linkInfo[i].remoteDeviceInfo.device) && 2254 (pGpu->idInfo.PCIDeviceID == nvlinkStatus->linkInfo[i].remoteDeviceInfo.pciDeviceId)) 2255 { 2256 NvU32 capsTbl = nvlinkStatus->linkInfo[i].capsTbl; 2257 2258 NV_ASSERT(NV2080_CTRL_NVLINK_GET_CAP(((NvU8 *)&capsTbl), NV2080_CTRL_NVLINK_CAPS_P2P_ATOMICS)); 2259 2260 if (bwMBps == 0) 2261 version = nvlinkStatus->linkInfo[i].nvlinkVersion; 2262 2263 bwMBps += nvlinkStatus->linkInfo[i].nvlinkLineRateMbps; 2264 NV_ASSERT(version == nvlinkStatus->linkInfo[i].nvlinkVersion); 2265 } 2266 } 2267 } 2268 2269 *linkBandwidthMBps = bwMBps; 2270 if (version == NV2080_CTRL_NVLINK_STATUS_NVLINK_VERSION_INVALID) 2271 NV_ASSERT(*linkBandwidthMBps == 0); 2272 2273 return version; 2274 } 2275 2276 static NV_STATUS getC2CConnectionToCpu(NvHandle hClient, 2277 NvHandle hSubDevice, 2278 NvBool *connectedToCpu, 2279 NvU32 *linkBandwidthMBps) 2280 { 2281 NV2080_CTRL_CMD_BUS_GET_C2C_INFO_PARAMS params = {0}; 2282 RM_API *pRmApi = rmapiGetInterface(RMAPI_EXTERNAL_KERNEL); 2283 2284 *connectedToCpu = NV_FALSE; 2285 *linkBandwidthMBps = 0; 2286 2287 NV_ASSERT_OK_OR_RETURN(pRmApi->Control(pRmApi, 2288 hClient, 2289 hSubDevice, 2290 NV2080_CTRL_CMD_BUS_GET_C2C_INFO, 2291 ¶ms, 2292 sizeof(params))); 2293 2294 if (params.bIsLinkUp == NV_TRUE && 2295 (params.remoteType == NV2080_CTRL_BUS_GET_C2C_INFO_REMOTE_TYPE_CPU)) 2296 { 2297 *connectedToCpu = NV_TRUE; 2298 *linkBandwidthMBps = params.nrLinks * params.perLinkBwMBps; 2299 } 2300 2301 return NV_OK; 2302 } 2303 2304 // If the given NvLink connection has a NPU device as an endpoint, return the 2305 // version of the NvLink connection with that NPU , and the maximum 2306 // unidirectional bandwidth in megabytes per second. Otherwise, return 2307 // NV2080_CTRL_NVLINK_STATUS_NVLINK_VERSION_INVALID. 2308 static NvU32 getNvlinkConnectionToNpu(const NV2080_CTRL_CMD_NVLINK_GET_NVLINK_STATUS_PARAMS *nvlinkStatus, 2309 NvBool *atomicSupported, 2310 NvU32 *linkBandwidthMBps) 2311 { 2312 NvU32 i; 2313 NvU32 bwMBps = 0; 2314 NvU32 version = NV2080_CTRL_NVLINK_STATUS_NVLINK_VERSION_INVALID; 2315 2316 *atomicSupported = NV_FALSE; 2317 2318 for (i = 0; i < NV2080_CTRL_NVLINK_MAX_LINKS; ++i) 2319 { 2320 if (((1 << i) & nvlinkStatus->enabledLinkMask) == 0) 2321 continue; 2322 2323 if (!nvlinkStatus->linkInfo[i].connected) 2324 continue; 2325 2326 // Skip loopback/loopout connections 2327 if (nvlinkStatus->linkInfo[i].loopProperty != NV2080_CTRL_NVLINK_STATUS_LOOP_PROPERTY_NONE) 2328 continue; 2329 2330 if (nvlinkStatus->linkInfo[i].remoteDeviceInfo.deviceType == NV2080_CTRL_NVLINK_DEVICE_INFO_DEVICE_TYPE_NPU) 2331 { 2332 NvU32 capsTbl = nvlinkStatus->linkInfo[i].capsTbl; 2333 NvBool atomicCap = !!NV2080_CTRL_NVLINK_GET_CAP(((NvU8 *)&capsTbl), NV2080_CTRL_NVLINK_CAPS_SYSMEM_ATOMICS); 2334 2335 if (bwMBps == 0) 2336 { 2337 *atomicSupported = atomicCap; 2338 version = nvlinkStatus->linkInfo[i].nvlinkVersion; 2339 } 2340 bwMBps += nvlinkStatus->linkInfo[i].nvlinkLineRateMbps; 2341 NV_ASSERT(version == nvlinkStatus->linkInfo[i].nvlinkVersion); 2342 NV_ASSERT(*atomicSupported == atomicCap); 2343 } 2344 } 2345 2346 *linkBandwidthMBps = bwMBps; 2347 if (version == NV2080_CTRL_NVLINK_STATUS_NVLINK_VERSION_INVALID) 2348 NV_ASSERT(*linkBandwidthMBps == 0); 2349 2350 return version; 2351 } 2352 2353 // If the given NvLink connection has a switch as an endpoint, return the 2354 // version of the NvLink connection with that switch, and the maximum 2355 // unidirectional bandwidth in megabytes per second. Otherwise, return 2356 // NV2080_CTRL_NVLINK_STATUS_NVLINK_VERSION_INVALID. 2357 static NvU32 getNvlinkConnectionToSwitch(const NV2080_CTRL_CMD_NVLINK_GET_NVLINK_STATUS_PARAMS *nvlinkStatus, 2358 NvU32 *linkBandwidthMBps) 2359 { 2360 NvU32 i; 2361 NvU32 bwMBps = 0; 2362 NvU32 version = NV2080_CTRL_NVLINK_STATUS_NVLINK_VERSION_INVALID; 2363 2364 for (i = 0; i < NV2080_CTRL_NVLINK_MAX_LINKS; ++i) 2365 { 2366 if (((1 << i) & nvlinkStatus->enabledLinkMask) == 0) 2367 continue; 2368 2369 if (!nvlinkStatus->linkInfo[i].connected) 2370 continue; 2371 2372 // Skip loopback/loopout connections 2373 if (nvlinkStatus->linkInfo[i].loopProperty != NV2080_CTRL_NVLINK_STATUS_LOOP_PROPERTY_NONE) 2374 continue; 2375 2376 if (nvlinkStatus->linkInfo[i].remoteDeviceInfo.deviceType == NV2080_CTRL_NVLINK_DEVICE_INFO_DEVICE_TYPE_SWITCH) 2377 { 2378 if (bwMBps == 0) 2379 version = nvlinkStatus->linkInfo[i].nvlinkVersion; 2380 2381 bwMBps += nvlinkStatus->linkInfo[i].nvlinkLineRateMbps; 2382 NV_ASSERT(version == nvlinkStatus->linkInfo[i].nvlinkVersion); 2383 } 2384 } 2385 2386 *linkBandwidthMBps = bwMBps; 2387 if (version == NV2080_CTRL_NVLINK_STATUS_NVLINK_VERSION_INVALID) 2388 NV_ASSERT(*linkBandwidthMBps == 0); 2389 2390 return version; 2391 } 2392 2393 // Compute whether the non-peer GPUs with the given NVLink connections can 2394 // communicate through P9 NPUs 2395 static NV_STATUS gpusHaveNpuNvlink(NV2080_CTRL_CMD_NVLINK_GET_NVLINK_STATUS_PARAMS *nvlinkStatus1, 2396 NV2080_CTRL_CMD_NVLINK_GET_NVLINK_STATUS_PARAMS *nvlinkStatus2, 2397 NvU32 *nvlinkVersion) 2398 { 2399 NvU32 nvlinkVersion1, nvlinkVersion2; 2400 NvU32 tmpLinkBandwidthMBps; 2401 NvBool atomicSupported1, atomicSupported2; 2402 2403 *nvlinkVersion = NV2080_CTRL_NVLINK_STATUS_NVLINK_VERSION_INVALID; 2404 2405 nvlinkVersion1 = getNvlinkConnectionToNpu(nvlinkStatus1, 2406 &atomicSupported1, 2407 &tmpLinkBandwidthMBps); 2408 nvlinkVersion2 = getNvlinkConnectionToNpu(nvlinkStatus2, 2409 &atomicSupported2, 2410 &tmpLinkBandwidthMBps); 2411 2412 if (nvlinkVersion1 == NV2080_CTRL_NVLINK_STATUS_NVLINK_VERSION_INVALID || 2413 nvlinkVersion2 == NV2080_CTRL_NVLINK_STATUS_NVLINK_VERSION_INVALID) 2414 return NV_OK; 2415 2416 // Non-peer GPU communication over NPU is only supported on NVLink 2.0 or 2417 // greater 2418 if (nvlinkVersion1 == NV2080_CTRL_NVLINK_STATUS_NVLINK_VERSION_1_0 || 2419 nvlinkVersion2 == NV2080_CTRL_NVLINK_STATUS_NVLINK_VERSION_1_0) 2420 { 2421 // NVLink1 devices cannot be mixed with other versions. NVLink3 2422 // supports mixing NVLink2 and NVLink3 devices 2423 NV_ASSERT(nvlinkVersion1 == nvlinkVersion2); 2424 return NV_OK; 2425 } 2426 2427 NV_ASSERT(atomicSupported1); 2428 NV_ASSERT(atomicSupported2); 2429 2430 // We do not explore the whole connectivity graph. We assume that NPUs 2431 // connected to NVLink2 (and greater) can forward memory requests so that 2432 // if GPU A is connected to NPU M and GPU B is connected to NPU N, A can 2433 // access B. 2434 *nvlinkVersion = NV_MIN(nvlinkVersion1, nvlinkVersion2); 2435 2436 return NV_OK; 2437 } 2438 2439 static NV_STATUS rmSystemP2PCapsControl(struct gpuDevice *device1, 2440 struct gpuDevice *device2, 2441 NV0000_CTRL_SYSTEM_GET_P2P_CAPS_V2_PARAMS *p2pCapsParams) 2442 { 2443 RM_API *pRmApi = rmapiGetInterface(RMAPI_EXTERNAL_KERNEL); 2444 2445 portMemSet(p2pCapsParams, 0, sizeof(*p2pCapsParams)); 2446 p2pCapsParams->gpuIds[0] = device1->gpuId; 2447 p2pCapsParams->gpuIds[1] = device2->gpuId; 2448 p2pCapsParams->gpuCount = 2; 2449 2450 NvHandle handle = device1->session->handle; 2451 NV_STATUS status = pRmApi->Control(pRmApi, 2452 handle, 2453 handle, 2454 NV0000_CTRL_CMD_SYSTEM_GET_P2P_CAPS_V2, 2455 p2pCapsParams, 2456 sizeof(*p2pCapsParams)); 2457 if (status != NV_OK) 2458 { 2459 NV_PRINTF(LEVEL_ERROR, "%s:%d: %s\n", __FUNCTION__, 2460 __LINE__, nvstatusToString(status)); 2461 } 2462 2463 return status; 2464 } 2465 2466 // Get R/W/A access capabilities and the link type between the two given GPUs 2467 static NV_STATUS getSystemP2PCaps(struct gpuDevice *device1, 2468 struct gpuDevice *device2, 2469 struct systemP2PCaps *p2pCaps) 2470 { 2471 NV0000_CTRL_SYSTEM_GET_P2P_CAPS_V2_PARAMS *p2pCapsParams = NULL; 2472 NV_STATUS status = NV_OK; 2473 2474 p2pCapsParams = portMemAllocNonPaged(sizeof(*p2pCapsParams)); 2475 if (p2pCapsParams == NULL) 2476 { 2477 status = NV_ERR_NO_MEMORY; 2478 goto done; 2479 } 2480 2481 status = rmSystemP2PCapsControl(device1, device2, p2pCapsParams); 2482 if (status != NV_OK) 2483 goto done; 2484 2485 portMemSet(p2pCaps, 0, sizeof(*p2pCaps)); 2486 p2pCaps->peerIds[0] = p2pCapsParams->busPeerIds[0 * 2 + 1]; 2487 p2pCaps->peerIds[1] = p2pCapsParams->busPeerIds[1 * 2 + 0]; 2488 p2pCaps->nvlinkSupported = !!REF_VAL(NV0000_CTRL_SYSTEM_GET_P2P_CAPS_NVLINK_SUPPORTED, p2pCapsParams->p2pCaps); 2489 p2pCaps->atomicSupported = !!REF_VAL(NV0000_CTRL_SYSTEM_GET_P2P_CAPS_ATOMICS_SUPPORTED, p2pCapsParams->p2pCaps); 2490 p2pCaps->indirectAccessSupported = !!REF_VAL(NV0000_CTRL_SYSTEM_GET_P2P_CAPS_INDIRECT_NVLINK_SUPPORTED, 2491 p2pCapsParams->p2pCaps); 2492 2493 // TODO: Bug 1768805: Check both reads and writes since RM seems to be 2494 // currently incorrectly reporting just the P2P write cap on some 2495 // systems that cannot support P2P at all. See the bug for more 2496 // details. 2497 if (REF_VAL(NV0000_CTRL_SYSTEM_GET_P2P_CAPS_READS_SUPPORTED, p2pCapsParams->p2pCaps) && 2498 REF_VAL(NV0000_CTRL_SYSTEM_GET_P2P_CAPS_WRITES_SUPPORTED, p2pCapsParams->p2pCaps)) 2499 { 2500 NV_ASSERT(!p2pCaps->indirectAccessSupported); 2501 2502 p2pCaps->accessSupported = NV_TRUE; 2503 } 2504 2505 if (p2pCaps->nvlinkSupported || p2pCaps->indirectAccessSupported) 2506 { 2507 // Exactly one CE is expected to be recommended for transfers between 2508 // NvLink peers 2509 NV_ASSERT(nvPopCount32(p2pCapsParams->p2pOptimalWriteCEs) == 1); 2510 2511 // Query the write mask only; UVM has no use for the read mask 2512 p2pCaps->optimalNvlinkWriteCEs[0] = BIT_IDX_32(p2pCapsParams->p2pOptimalWriteCEs); 2513 2514 // Query the P2P capabilities of device2->device1, which may be 2515 // different from those of device1->device2 2516 status = rmSystemP2PCapsControl(device2, device1, p2pCapsParams); 2517 if (status != NV_OK) 2518 goto done; 2519 2520 NV_ASSERT(nvPopCount32(p2pCapsParams->p2pOptimalWriteCEs) == 1); 2521 2522 p2pCaps->optimalNvlinkWriteCEs[1] = BIT_IDX_32(p2pCapsParams->p2pOptimalWriteCEs); 2523 } 2524 2525 done: 2526 portMemFree(p2pCapsParams); 2527 return status; 2528 } 2529 2530 // Return the NVLink P2P capabilities of the peer GPUs with the given devices 2531 static NV_STATUS getNvlinkP2PCaps(struct gpuDevice *device1, 2532 struct gpuDevice *device2, 2533 NV2080_CTRL_CMD_NVLINK_GET_NVLINK_STATUS_PARAMS *nvlinkStatus1, 2534 NV2080_CTRL_CMD_NVLINK_GET_NVLINK_STATUS_PARAMS *nvlinkStatus2, 2535 NvU32 *nvlinkVersion, 2536 NvU32 *linkBandwidthMBps) 2537 { 2538 NvU32 nvlinkVersion1, nvlinkVersion2; 2539 NvU32 linkBandwidthMBps1, linkBandwidthMBps2; 2540 2541 *nvlinkVersion = NV2080_CTRL_NVLINK_STATUS_NVLINK_VERSION_INVALID; 2542 2543 if (device1->info.connectedToSwitch && device2->info.connectedToSwitch) 2544 { 2545 nvlinkVersion1 = getNvlinkConnectionToSwitch(nvlinkStatus1, 2546 &linkBandwidthMBps1); 2547 nvlinkVersion2 = getNvlinkConnectionToSwitch(nvlinkStatus2, 2548 &linkBandwidthMBps2); 2549 } 2550 else 2551 { 2552 OBJGPU *pGpu1, *pGpu2; 2553 2554 pGpu1 = gpumgrGetGpuFromId(device1->gpuId); 2555 if (!pGpu1) 2556 return NV_ERR_OBJECT_NOT_FOUND; 2557 2558 pGpu2 = gpumgrGetGpuFromId(device2->gpuId); 2559 if (!pGpu2) 2560 return NV_ERR_OBJECT_NOT_FOUND; 2561 2562 nvlinkVersion1 = getNvlinkConnectionToGpu(nvlinkStatus1, 2563 pGpu2, 2564 &linkBandwidthMBps1); 2565 nvlinkVersion2 = getNvlinkConnectionToGpu(nvlinkStatus2, 2566 pGpu1, 2567 &linkBandwidthMBps2); 2568 } 2569 2570 if (nvlinkVersion1 == NV2080_CTRL_NVLINK_STATUS_NVLINK_VERSION_INVALID || 2571 nvlinkVersion2 == NV2080_CTRL_NVLINK_STATUS_NVLINK_VERSION_INVALID) 2572 { 2573 *linkBandwidthMBps = 0; 2574 return NV_OK; 2575 } 2576 2577 // NVLink1 devices cannot be mixed with other versions. NVLink3 supports 2578 // mixing NVLink2 and NVLink3 devices. NVLink4 devices cannot be mixed with 2579 // prior NVLink versions. 2580 if (nvlinkVersion1 == NV2080_CTRL_NVLINK_STATUS_NVLINK_VERSION_1_0 || 2581 nvlinkVersion2 == NV2080_CTRL_NVLINK_STATUS_NVLINK_VERSION_1_0 || 2582 nvlinkVersion1 == NV2080_CTRL_NVLINK_STATUS_NVLINK_VERSION_4_0 || 2583 nvlinkVersion2 == NV2080_CTRL_NVLINK_STATUS_NVLINK_VERSION_4_0) 2584 { 2585 NV_ASSERT(nvlinkVersion1 == nvlinkVersion2); 2586 NV_ASSERT(linkBandwidthMBps1 == linkBandwidthMBps2); 2587 } 2588 2589 *nvlinkVersion = NV_MIN(nvlinkVersion1, nvlinkVersion2); 2590 *linkBandwidthMBps = NV_MIN(linkBandwidthMBps1, linkBandwidthMBps2); 2591 2592 return NV_OK; 2593 } 2594 2595 NV_STATUS nvGpuOpsGetP2PCaps(struct gpuDevice *device1, 2596 struct gpuDevice *device2, 2597 getP2PCapsParams *p2pCapsParams) 2598 { 2599 NV_STATUS status = NV_OK; 2600 NV2080_CTRL_CMD_NVLINK_GET_NVLINK_STATUS_PARAMS *nvlinkStatus1 = NULL; 2601 NV2080_CTRL_CMD_NVLINK_GET_NVLINK_STATUS_PARAMS *nvlinkStatus2 = NULL; 2602 struct systemP2PCaps p2pCaps; 2603 2604 if (!device1 || !device2) 2605 return NV_ERR_INVALID_ARGUMENT; 2606 2607 if (device1->session != device2->session) 2608 return NV_ERR_INVALID_ARGUMENT; 2609 2610 if (!p2pCapsParams) 2611 return NV_ERR_INVALID_ARGUMENT; 2612 2613 status = allocNvlinkStatus(device1->session->handle, 2614 device1->subhandle, 2615 &nvlinkStatus1); 2616 if (status != NV_OK) 2617 goto cleanup; 2618 2619 status = allocNvlinkStatus(device2->session->handle, 2620 device2->subhandle, 2621 &nvlinkStatus2); 2622 if (status != NV_OK) 2623 goto cleanup; 2624 2625 portMemSet(p2pCapsParams, 0, sizeof(*p2pCapsParams)); 2626 p2pCapsParams->peerIds[0] = (NvU32)-1; 2627 p2pCapsParams->peerIds[1] = (NvU32)-1; 2628 p2pCapsParams->p2pLink = UVM_LINK_TYPE_NONE; 2629 p2pCapsParams->indirectAccess = NV_FALSE; 2630 2631 status = getSystemP2PCaps(device1, device2, &p2pCaps); 2632 if (status != NV_OK) 2633 goto cleanup; 2634 2635 if (p2pCaps.indirectAccessSupported) 2636 { 2637 NvU32 nvlinkVersion; 2638 NvU32 p2pLink; 2639 2640 status = gpusHaveNpuNvlink(nvlinkStatus1, 2641 nvlinkStatus2, 2642 &nvlinkVersion); 2643 if (status != NV_OK) 2644 goto cleanup; 2645 2646 p2pLink = rmControlToUvmNvlinkVersion(nvlinkVersion); 2647 2648 NV_ASSERT(p2pLink >= UVM_LINK_TYPE_NVLINK_2); 2649 2650 p2pCapsParams->indirectAccess = NV_TRUE; 2651 p2pCapsParams->p2pLink = p2pLink; 2652 p2pCapsParams->optimalNvlinkWriteCEs[0] = p2pCaps.optimalNvlinkWriteCEs[0]; 2653 p2pCapsParams->optimalNvlinkWriteCEs[1] = p2pCaps.optimalNvlinkWriteCEs[1]; 2654 2655 // Link bandwidth not provided because the intermediate link rate could 2656 // vary a lot with system topologies & current load, making this bandwidth 2657 // obsolete. 2658 p2pCapsParams->totalLinkLineRateMBps = 0; 2659 } 2660 else if (p2pCaps.accessSupported) 2661 { 2662 p2pCapsParams->peerIds[0] = p2pCaps.peerIds[0]; 2663 p2pCapsParams->peerIds[1] = p2pCaps.peerIds[1]; 2664 2665 if (p2pCaps.nvlinkSupported) 2666 { 2667 NvU32 nvlinkVersion; 2668 NvU32 linkBandwidthMBps; 2669 2670 NV_ASSERT(p2pCaps.atomicSupported); 2671 2672 status = getNvlinkP2PCaps(device1, 2673 device2, 2674 nvlinkStatus1, 2675 nvlinkStatus2, 2676 &nvlinkVersion, 2677 &linkBandwidthMBps); 2678 if (status != NV_OK) 2679 goto cleanup; 2680 2681 p2pCapsParams->p2pLink = rmControlToUvmNvlinkVersion(nvlinkVersion); 2682 p2pCapsParams->optimalNvlinkWriteCEs[0] = p2pCaps.optimalNvlinkWriteCEs[0]; 2683 p2pCapsParams->optimalNvlinkWriteCEs[1] = p2pCaps.optimalNvlinkWriteCEs[1]; 2684 2685 NV_ASSERT(p2pCapsParams->p2pLink != UVM_LINK_TYPE_NONE); 2686 NV_ASSERT(linkBandwidthMBps != 0); 2687 2688 p2pCapsParams->totalLinkLineRateMBps = linkBandwidthMBps; 2689 } 2690 else 2691 { 2692 NvU32 linkBandwidthMBps1, linkBandwidthMBps2; 2693 2694 status = getPCIELinkRateMBps(device1->session->handle, 2695 device1->subhandle, 2696 &linkBandwidthMBps1); 2697 if (status != NV_OK) 2698 goto cleanup; 2699 2700 status = getPCIELinkRateMBps(device2->session->handle, 2701 device2->subhandle, 2702 &linkBandwidthMBps2); 2703 if (status != NV_OK) 2704 goto cleanup; 2705 2706 p2pCapsParams->p2pLink = UVM_LINK_TYPE_PCIE; 2707 p2pCapsParams->totalLinkLineRateMBps = NV_MIN(linkBandwidthMBps1, linkBandwidthMBps2); 2708 } 2709 } 2710 2711 cleanup: 2712 portMemFree(nvlinkStatus1); 2713 portMemFree(nvlinkStatus2); 2714 2715 return status; 2716 } 2717 2718 static NV_STATUS nvGpuOpsGetExternalAllocP2pInfo(struct gpuSession *session, 2719 NvU32 memOwnerGpuId, 2720 NvU32 gpuId, 2721 NvBool *isPeerSupported, 2722 NvU32 *peerId) 2723 { 2724 NV_STATUS status = NV_OK; 2725 NV0000_CTRL_SYSTEM_GET_P2P_CAPS_V2_PARAMS *p2pCapsParams = NULL; 2726 RM_API *pRmApi = rmapiGetInterface(RMAPI_GPU_LOCK_INTERNAL); 2727 2728 NV_ASSERT(gpuId != memOwnerGpuId); 2729 2730 p2pCapsParams = portMemAllocNonPaged(sizeof(*p2pCapsParams)); 2731 if (p2pCapsParams == NULL) 2732 { 2733 status = NV_ERR_NO_MEMORY; 2734 goto done; 2735 } 2736 2737 portMemSet(p2pCapsParams, 0, sizeof(*p2pCapsParams)); 2738 p2pCapsParams->gpuIds[0] = gpuId; 2739 p2pCapsParams->gpuIds[1] = memOwnerGpuId; 2740 p2pCapsParams->gpuCount = 2; 2741 2742 status = pRmApi->Control(pRmApi, 2743 session->handle, 2744 session->handle, 2745 NV0000_CTRL_CMD_SYSTEM_GET_P2P_CAPS_V2, 2746 p2pCapsParams, 2747 sizeof(*p2pCapsParams)); 2748 if (status != NV_OK) 2749 goto done; 2750 2751 *isPeerSupported = 2752 (REF_VAL(NV0000_CTRL_SYSTEM_GET_P2P_CAPS_WRITES_SUPPORTED, p2pCapsParams->p2pCaps) && 2753 REF_VAL(NV0000_CTRL_SYSTEM_GET_P2P_CAPS_READS_SUPPORTED, p2pCapsParams->p2pCaps)); 2754 2755 *peerId = p2pCapsParams->busPeerIds[0 * 2 + 1]; 2756 2757 done: 2758 portMemFree(p2pCapsParams); 2759 return status; 2760 } 2761 2762 static GMMU_APERTURE nvGpuOpsGetExternalAllocAperture(PMEMORY_DESCRIPTOR pMemDesc, 2763 NvBool isIndirectPeerSupported, 2764 NvBool isPeerSupported) 2765 { 2766 // Don't support both direct and indirect peers 2767 NV_ASSERT(!(isIndirectPeerSupported && isPeerSupported)); 2768 2769 // Get the aperture 2770 if (memdescGetAddressSpace(pMemDesc) == ADDR_FBMEM) 2771 { 2772 if (isIndirectPeerSupported) 2773 return GMMU_APERTURE_SYS_COH; 2774 2775 if (isPeerSupported) 2776 return GMMU_APERTURE_PEER; 2777 2778 return GMMU_APERTURE_VIDEO; 2779 } 2780 else if ( 2781 (memdescGetAddressSpace(pMemDesc) == ADDR_FABRIC_MC) || 2782 (memdescGetAddressSpace(pMemDesc) == ADDR_FABRIC_V2)) 2783 { 2784 return GMMU_APERTURE_PEER; 2785 } 2786 else 2787 { 2788 return GMMU_APERTURE_SYS_COH; 2789 } 2790 } 2791 2792 static NvBool nvGpuOpsGetExternalAllocVolatility(PMEMORY_DESCRIPTOR pMemDesc, 2793 GMMU_APERTURE aperture, 2794 NvBool isIndirectPeerSupported, 2795 UvmRmGpuCachingType cachingType) 2796 { 2797 if (cachingType == UvmRmGpuCachingTypeDefault) 2798 { 2799 if (aperture == GMMU_APERTURE_PEER || isIndirectPeerSupported) 2800 return (memdescGetGpuP2PCacheAttrib(pMemDesc) == NV_MEMORY_UNCACHED) ? NV_TRUE : NV_FALSE; 2801 else 2802 return (memdescGetGpuCacheAttrib(pMemDesc) == NV_MEMORY_UNCACHED) ? NV_TRUE : NV_FALSE; 2803 } 2804 else if (cachingType == UvmRmGpuCachingTypeForceUncached) 2805 { 2806 return NV_TRUE; 2807 } 2808 else 2809 { 2810 return NV_FALSE; 2811 } 2812 } 2813 2814 static NV_STATUS nvGpuOpsGetExternalAllocMappingAttribute(UvmRmGpuMappingType mappingType, 2815 PMEMORY_DESCRIPTOR pMemDesc, 2816 NvBool *readOnly, 2817 NvBool *atomic) 2818 { 2819 *readOnly = NV_FALSE; 2820 *atomic = NV_FALSE; 2821 2822 if (memdescGetFlag(pMemDesc, MEMDESC_FLAGS_DEVICE_READ_ONLY)) 2823 { 2824 if (mappingType != UvmRmGpuMappingTypeDefault && 2825 mappingType != UvmRmGpuMappingTypeReadOnly) 2826 return NV_ERR_INVALID_ACCESS_TYPE; 2827 2828 *readOnly = NV_TRUE; 2829 *atomic = NV_FALSE; 2830 } 2831 else 2832 { 2833 *readOnly = (mappingType == UvmRmGpuMappingTypeReadOnly); 2834 *atomic = (mappingType == UvmRmGpuMappingTypeDefault || 2835 mappingType == UvmRmGpuMappingTypeReadWriteAtomic); 2836 } 2837 2838 return NV_OK; 2839 } 2840 2841 static NV_STATUS nvGpuOpsGetPteKind(OBJGPU *pMappingGpu, 2842 MemoryManager *pMemoryManager, 2843 PMEMORY_DESCRIPTOR pMemDesc, 2844 Memory *pMemory, 2845 gpuExternalMappingInfo *pGpuExternalMappingInfo, 2846 NvU32 *newKind) 2847 { 2848 NV_STATUS status = NV_OK; 2849 FB_ALLOC_PAGE_FORMAT fbAllocPageFormat = {0}; 2850 NvU32 ctagId; 2851 2852 if (pGpuExternalMappingInfo->compressionType == UvmRmGpuCompressionTypeEnabledNoPlc) 2853 { 2854 if (memmgrIsKind_HAL(pMemoryManager, FB_IS_KIND_COMPRESSIBLE, *newKind)) 2855 { 2856 status = memmgrChooseKind_HAL(pMappingGpu, 2857 pMemoryManager, 2858 &fbAllocPageFormat, 2859 NVOS32_ATTR_COMPR_DISABLE_PLC_ANY, 2860 newKind); 2861 } 2862 else 2863 { 2864 status = NV_ERR_INVALID_ARGUMENT; 2865 } 2866 2867 if (status != NV_OK) 2868 return status; 2869 } 2870 2871 if (pGpuExternalMappingInfo->formatType != UvmRmGpuFormatTypeDefault) 2872 { 2873 NV_ASSERT(pGpuExternalMappingInfo->elementBits != UvmRmGpuFormatElementBitsDefault); 2874 2875 fbAllocPageFormat.attr = pMemory->Attr; 2876 fbAllocPageFormat.attr2 = pMemory->Attr2; 2877 fbAllocPageFormat.flags = pMemory->Flags; 2878 fbAllocPageFormat.type = pMemory->Type; 2879 2880 switch (pGpuExternalMappingInfo->formatType) 2881 { 2882 case UvmRmGpuFormatTypeBlockLinear: 2883 fbAllocPageFormat.attr = FLD_SET_DRF(OS32, _ATTR, _FORMAT, _BLOCK_LINEAR, fbAllocPageFormat.attr); 2884 break; 2885 default: 2886 break; 2887 } 2888 2889 switch (pGpuExternalMappingInfo->elementBits) 2890 { 2891 case UvmRmGpuFormatElementBits8: 2892 fbAllocPageFormat.attr = FLD_SET_DRF(OS32, _ATTR, _DEPTH, _8, fbAllocPageFormat.attr); 2893 break; 2894 case UvmRmGpuFormatElementBits16: 2895 fbAllocPageFormat.attr = FLD_SET_DRF(OS32, _ATTR, _DEPTH, _16, fbAllocPageFormat.attr); 2896 break; 2897 // CUDA does not support 24-bit width 2898 case UvmRmGpuFormatElementBits32: 2899 fbAllocPageFormat.attr = FLD_SET_DRF(OS32, _ATTR, _DEPTH, _32, fbAllocPageFormat.attr); 2900 break; 2901 case UvmRmGpuFormatElementBits64: 2902 fbAllocPageFormat.attr = FLD_SET_DRF(OS32, _ATTR, _DEPTH, _64, fbAllocPageFormat.attr); 2903 break; 2904 case UvmRmGpuFormatElementBits128: 2905 fbAllocPageFormat.attr = FLD_SET_DRF(OS32, _ATTR, _DEPTH, _128, fbAllocPageFormat.attr); 2906 break; 2907 default: 2908 break; 2909 } 2910 2911 status = memmgrChooseKind_HAL(pMappingGpu, pMemoryManager, &fbAllocPageFormat, 2912 DRF_VAL(OS32, _ATTR, _COMPR, fbAllocPageFormat.attr), 2913 newKind); 2914 if (status != NV_OK) 2915 { 2916 NV_PRINTF(LEVEL_ERROR, "Invalid kind type (%x)\n", *newKind); 2917 return status; 2918 } 2919 2920 // 2921 // Check that the memory descriptor already has allocated comptags 2922 // if the new mapping enables compression. Downgrade the kind if no 2923 // comptags are present. 2924 // 2925 ctagId = FB_HWRESID_CTAGID_VAL_FERMI(memdescGetHwResId(pMemDesc)); 2926 if (memmgrIsKind_HAL(pMemoryManager, FB_IS_KIND_COMPRESSIBLE, *newKind) && !ctagId) 2927 *newKind = memmgrGetUncompressedKind_HAL(pMappingGpu, pMemoryManager, *newKind, NV_FALSE); 2928 2929 if (*newKind == NV_MMU_PTE_KIND_INVALID) 2930 return NV_ERR_INVALID_ARGUMENT; 2931 } 2932 else 2933 { 2934 NV_ASSERT((pGpuExternalMappingInfo->elementBits == UvmRmGpuFormatElementBitsDefault) || 2935 (pGpuExternalMappingInfo->elementBits == UvmRmGpuFormatElementBits8)); 2936 } 2937 2938 return NV_OK; 2939 } 2940 2941 static 2942 NV_STATUS 2943 nvGpuOpsMemGetPageSize 2944 ( 2945 OBJGPU *pGpu, 2946 MEMORY_DESCRIPTOR *pMemDesc, 2947 NvU64 *pPageSize 2948 ) 2949 { 2950 NvU64 pageSize; 2951 MemoryManager *pMemoryManager = GPU_GET_MEMORY_MANAGER(pGpu); 2952 NV_STATUS status; 2953 2954 pageSize = memdescGetPageSize(pMemDesc, AT_GPU); 2955 if (pageSize == 0) 2956 { 2957 status = memmgrSetMemDescPageSize_HAL(pGpu, 2958 pMemoryManager, 2959 pMemDesc, 2960 AT_GPU, 2961 RM_ATTR_PAGE_SIZE_DEFAULT); 2962 if (status != NV_OK) 2963 return status; 2964 2965 pageSize = memdescGetPageSize(pMemDesc, AT_GPU); 2966 NV_ASSERT(pageSize != 0); 2967 } 2968 2969 *pPageSize = pageSize; 2970 2971 return NV_OK; 2972 } 2973 2974 static 2975 NV_STATUS 2976 nvGpuOpsBuildExternalAllocPtes 2977 ( 2978 OBJVASPACE *pVAS, 2979 OBJGPU *pMappingGpu, 2980 MEMORY_DESCRIPTOR *pMemDesc, 2981 Memory *pMemory, 2982 NvU64 offset, 2983 NvU64 size, 2984 NvBool isIndirectPeerSupported, 2985 NvBool isPeerSupported, 2986 NvU32 peerId, 2987 gpuExternalMappingInfo *pGpuExternalMappingInfo 2988 ) 2989 { 2990 NV_STATUS status = NV_OK; 2991 OBJGVASPACE *pGVAS = NULL; 2992 const GMMU_FMT *pFmt = NULL; 2993 const GMMU_FMT_PTE *pPteFmt = NULL; 2994 const MMU_FMT_LEVEL *pLevelFmt = NULL; 2995 GMMU_APERTURE aperture; 2996 COMPR_INFO comprInfo; 2997 GMMU_ENTRY_VALUE pte = {{0}}; 2998 2999 NvU64 fabricBaseAddress = NVLINK_INVALID_FABRIC_ADDR; 3000 NvU32 kind; 3001 NvU64 pageSize; 3002 NvU32 skipPteCount; 3003 NvBool vol, atomic, readOnly; 3004 NvBool encrypted, privileged; 3005 NvU64 iter, physAddr, mappingSize, pteCount; 3006 MemoryManager *pMemoryManager = GPU_GET_MEMORY_MANAGER(pMappingGpu); 3007 KernelGmmu *pKernelGmmu = GPU_GET_KERNEL_GMMU(pMappingGpu); 3008 NvU64 allocSize; 3009 NvBool isCompressedKind; 3010 NvU64 *physicalAddresses = NULL; 3011 NvU32 newKind, oldKind; 3012 NvBool kindChanged = NV_FALSE; 3013 NvU64 gpaOffset; 3014 NvBool *isPLCable = NULL; 3015 NvU64 *guestPhysicalAddress = NULL; 3016 NvU64 mappingPageSize = pGpuExternalMappingInfo->mappingPageSize; 3017 3018 NV_ASSERT(!memdescHasSubDeviceMemDescs(pMemDesc)); 3019 3020 status = nvGpuOpsMemGetPageSize(pMappingGpu, 3021 pMemDesc, 3022 &pageSize); 3023 if (status != NV_OK) 3024 return status; 3025 3026 // 3027 // Default mappingPageSize to allocation's page size if passed as 0. 3028 // If mappingPageSize is non-zero, it must be a multiple of pageSize. 3029 // Also, mapping page size cannot be larger than alloc page size. 3030 // 3031 if (mappingPageSize == 0) 3032 { 3033 mappingPageSize = pageSize; 3034 } 3035 else if ((mappingPageSize > pageSize) || 3036 (pageSize % mappingPageSize != 0)) 3037 { 3038 return NV_ERR_INVALID_ARGUMENT; 3039 } 3040 3041 // memdescGetSize returns the requested size of the allocation. But, the 3042 // actual allocation size could be larger than the requested size due 3043 // to alignment requirement. So, make sure the correct size is used. 3044 // Note, alignment can be greater than the pageSize. 3045 allocSize = RM_ALIGN_UP(pMemDesc->ActualSize, pageSize); 3046 3047 if (offset >= allocSize) 3048 return NV_ERR_INVALID_BASE; 3049 3050 if ((offset + size) > allocSize) 3051 return NV_ERR_INVALID_LIMIT; 3052 3053 if ((size & (mappingPageSize - 1)) != 0) 3054 return NV_ERR_INVALID_ARGUMENT; 3055 3056 if ((offset & (mappingPageSize - 1)) != 0) 3057 return NV_ERR_INVALID_ARGUMENT; 3058 3059 pGVAS = dynamicCast(pVAS, OBJGVASPACE); 3060 3061 // Get the GMMU format 3062 pFmt = gvaspaceGetGmmuFmt(pGVAS, pMappingGpu); 3063 pPteFmt = (GMMU_FMT_PTE*)pFmt->pPte; 3064 pLevelFmt = mmuFmtFindLevelWithPageShift(pFmt->pRoot, BIT_IDX_64(mappingPageSize)); 3065 3066 oldKind = newKind = memdescGetPteKindForGpu(pMemDesc, pMappingGpu); 3067 if (pMemory) 3068 { 3069 // 3070 // The physical memory layout can be specified after allocation using 3071 // UvmMapExternalAllocation, so the kind attribute needs to be computed 3072 // again 3073 // 3074 status = nvGpuOpsGetPteKind(pMappingGpu, pMemoryManager, pMemDesc, pMemory, 3075 pGpuExternalMappingInfo, &newKind); 3076 3077 if (status != NV_OK) 3078 return status; 3079 3080 if (oldKind != newKind) 3081 { 3082 memdescSetPteKindForGpu(pMemDesc, pMappingGpu, newKind); 3083 kindChanged = NV_TRUE; 3084 } 3085 } 3086 3087 // Get the CompTag range and Kind. 3088 status = memmgrGetKindComprForGpu_HAL(pMemoryManager, pMemDesc, pMappingGpu, 0, &kind, &comprInfo); 3089 if (status != NV_OK) 3090 return status; 3091 3092 if (kindChanged) 3093 memdescSetPteKindForGpu(pMemDesc, pMappingGpu, oldKind); 3094 3095 aperture = nvGpuOpsGetExternalAllocAperture(pMemDesc, isIndirectPeerSupported, isPeerSupported); 3096 3097 vol = nvGpuOpsGetExternalAllocVolatility(pMemDesc, aperture, isIndirectPeerSupported, 3098 pGpuExternalMappingInfo->cachingType); 3099 3100 status = nvGpuOpsGetExternalAllocMappingAttribute(pGpuExternalMappingInfo->mappingType, 3101 pMemDesc, 3102 &readOnly, 3103 &atomic); 3104 if (status != NV_OK) 3105 return status; 3106 3107 encrypted = memdescGetFlag(pMemDesc, MEMDESC_FLAGS_ENCRYPTED); 3108 3109 privileged = memdescGetFlag(pMemDesc, MEMDESC_FLAGS_GPU_PRIVILEGED); 3110 3111 mappingSize = size ? size : allocSize; 3112 3113 skipPteCount = pLevelFmt->entrySize / sizeof(NvU64); 3114 3115 isCompressedKind = memmgrIsKind_HAL(pMemoryManager, FB_IS_KIND_COMPRESSIBLE, kind); 3116 3117 // 3118 // Specifying mapping page size for compressed 3119 // allocations is not yet supported. 3120 // 3121 if (isCompressedKind && (pGpuExternalMappingInfo->mappingPageSize != 0) && 3122 (pGpuExternalMappingInfo->mappingPageSize != pageSize)) 3123 { 3124 return NV_ERR_NOT_SUPPORTED; 3125 } 3126 3127 pteCount = NV_MIN((pGpuExternalMappingInfo->pteBufferSize / pLevelFmt->entrySize), 3128 (mappingSize / mappingPageSize)); 3129 if (!pteCount) 3130 return NV_ERR_BUFFER_TOO_SMALL; 3131 3132 if (pFmt->version == GMMU_FMT_VERSION_3) 3133 { 3134 NvU32 ptePcfSw = 0; 3135 NvU32 ptePcfHw = 0; 3136 3137 nvFieldSetBool(&pPteFmt->fldValid, NV_TRUE, pte.v8); 3138 gmmuFieldSetAperture(&pPteFmt->fldAperture, aperture, pte.v8); 3139 nvFieldSet32(&pPteFmt->fldKind, kind, pte.v8); 3140 3141 ptePcfSw |= vol ? (1 << SW_MMU_PCF_UNCACHED_IDX) : 0; 3142 ptePcfSw |= readOnly ? (1 << SW_MMU_PCF_RO_IDX) : 0; 3143 ptePcfSw |= !atomic ? (1 << SW_MMU_PCF_NOATOMIC_IDX) : 0; 3144 ptePcfSw |= !privileged ? (1 << SW_MMU_PCF_REGULAR_IDX) : 0; 3145 3146 if ((memdescGetAddressSpace(pMemDesc) == ADDR_FABRIC_MC)) 3147 { 3148 ptePcfSw |= (1 << SW_MMU_PCF_ACE_IDX); 3149 } 3150 3151 NV_CHECK_OR_RETURN(LEVEL_ERROR, 3152 (kgmmuTranslatePtePcfFromSw_HAL(pKernelGmmu, ptePcfSw, &ptePcfHw) == NV_OK), 3153 NV_ERR_INVALID_ARGUMENT); 3154 nvFieldSet32(&pPteFmt->fldPtePcf, ptePcfHw, pte.v8); 3155 } 3156 else 3157 { 3158 if (nvFieldIsValid32(&pPteFmt->fldValid.desc)) 3159 nvFieldSetBool(&pPteFmt->fldValid, NV_TRUE, pte.v8); 3160 3161 if (nvFieldIsValid32(&pPteFmt->fldVolatile.desc)) 3162 nvFieldSetBool(&pPteFmt->fldVolatile, vol, pte.v8); 3163 3164 if (nvFieldIsValid32(&pPteFmt->fldPrivilege.desc)) 3165 nvFieldSetBool(&pPteFmt->fldPrivilege, privileged, pte.v8); 3166 3167 if (nvFieldIsValid32(&pPteFmt->fldEncrypted.desc)) 3168 nvFieldSetBool(&pPteFmt->fldEncrypted, encrypted, pte.v8); 3169 3170 if (nvFieldIsValid32(&pPteFmt->fldReadOnly.desc)) 3171 nvFieldSetBool(&pPteFmt->fldReadOnly, readOnly, pte.v8); 3172 3173 if (nvFieldIsValid32(&pPteFmt->fldWriteDisable.desc)) 3174 nvFieldSetBool(&pPteFmt->fldWriteDisable, readOnly, pte.v8); 3175 3176 if (nvFieldIsValid32(&pPteFmt->fldReadDisable.desc)) 3177 nvFieldSetBool(&pPteFmt->fldReadDisable, NV_FALSE, pte.v8); 3178 3179 if (nvFieldIsValid32(&pPteFmt->fldAtomicDisable.desc)) 3180 nvFieldSetBool(&pPteFmt->fldAtomicDisable, !atomic, pte.v8); 3181 3182 gmmuFieldSetAperture(&pPteFmt->fldAperture, aperture, pte.v8); 3183 3184 if (!isCompressedKind) 3185 { 3186 nvFieldSet32(&pPteFmt->fldKind, kind, pte.v8); 3187 nvFieldSet32(&pPteFmt->fldCompTagLine, 0, pte.v8); 3188 if (nvFieldIsValid32(&pPteFmt->fldCompTagSubIndex)) 3189 nvFieldSet32(&pPteFmt->fldCompTagSubIndex, 0, pte.v8); 3190 } 3191 } 3192 3193 if (aperture == GMMU_APERTURE_PEER) 3194 { 3195 FlaMemory* pFlaMemory = dynamicCast(pMemory, FlaMemory); 3196 nvFieldSet32(&pPteFmt->fldPeerIndex, peerId, pte.v8); 3197 3198 if ( 3199 (memdescGetAddressSpace(pMemDesc) == ADDR_FABRIC_MC) || 3200 (memdescGetAddressSpace(pMemDesc) == ADDR_FABRIC_V2) || pFlaMemory) 3201 { 3202 // 3203 // Any fabric memory descriptors are pre-encoded with the fabric base address 3204 // use NVLINK_INVALID_FABRIC_ADDR to avoid encoding twice 3205 // 3206 fabricBaseAddress = NVLINK_INVALID_FABRIC_ADDR; 3207 } 3208 else 3209 { 3210 KernelNvlink *pKernelNvlink = GPU_GET_KERNEL_NVLINK(pMemDesc->pGpu); 3211 if (pKernelNvlink == NULL) 3212 { 3213 fabricBaseAddress = NVLINK_INVALID_FABRIC_ADDR; 3214 } 3215 else 3216 { 3217 fabricBaseAddress = knvlinkGetUniqueFabricBaseAddress(pMemDesc->pGpu, pKernelNvlink); 3218 } 3219 } 3220 } 3221 3222 // 3223 // Both memdescGetPhysAddr() and kgmmuEncodePhysAddr() have pretty high overhead. 3224 // To avoid it, allocate an array for the physical addresses and use the 3225 // flavors of the APIs that work on multiple addresses at a time. 3226 // 3227 // Notably the pteBuffer array could be re-used for that, but it gets a bit 3228 // tricky if skipPteCount is greater than 1 so just keep it simple. 3229 // 3230 physicalAddresses = portMemAllocNonPaged((NvU32)pteCount * sizeof(*physicalAddresses)); 3231 if (physicalAddresses == NULL) 3232 return NV_ERR_NO_MEMORY; 3233 3234 // 3235 // Ask for physical addresses for the GPU being mapped as it may not be the 3236 // same as the GPU owning the memdesc. This matters for sysmem as accessing 3237 // it requires IOMMU mappings to be set up and these are different for each 3238 // GPU. The IOMMU mappings are currently added by nvGpuOpsDupMemory(). 3239 // 3240 memdescGetPhysAddrsForGpu(pMemDesc, pMappingGpu, AT_GPU, offset, mappingPageSize, 3241 pteCount, physicalAddresses); 3242 kgmmuEncodePhysAddrs(pKernelGmmu, aperture, physicalAddresses, fabricBaseAddress, pteCount); 3243 3244 3245 // 3246 // Get information whether given physical address needs PLCable kind 3247 // 3248 if (IS_VIRTUAL_WITH_SRIOV(pMappingGpu) && 3249 gpuIsWarBug200577889SriovHeavyEnabled(pMappingGpu) && 3250 isCompressedKind && 3251 !memmgrIsKind_HAL(pMemoryManager, FB_IS_KIND_DISALLOW_PLC, comprInfo.kind)) 3252 { 3253 guestPhysicalAddress = portMemAllocNonPaged((NvU32)pteCount * sizeof(*guestPhysicalAddress)); 3254 if (guestPhysicalAddress == NULL) 3255 { 3256 status = NV_ERR_NO_MEMORY; 3257 goto done; 3258 } 3259 3260 portMemSet(guestPhysicalAddress, 0, ((NvU32)pteCount * sizeof(*guestPhysicalAddress))); 3261 3262 gpaOffset = offset; 3263 for (iter = 0; iter < pteCount; iter++) 3264 { 3265 guestPhysicalAddress[iter] = gpaOffset; 3266 gpaOffset += mappingPageSize; 3267 } 3268 3269 isPLCable = portMemAllocNonPaged((NvU32)pteCount * sizeof(*isPLCable)); 3270 if (isPLCable == NULL) 3271 { 3272 status = NV_ERR_NO_MEMORY; 3273 goto done; 3274 } 3275 3276 portMemSet(isPLCable, 0, ((NvU32)pteCount * sizeof(*isPLCable))); 3277 3278 NV_RM_RPC_GET_PLCABLE_ADDRESS_KIND(pMappingGpu, guestPhysicalAddress, mappingPageSize, (NvU32)pteCount, 3279 isPLCable, status); 3280 if (status != NV_OK) 3281 goto done; 3282 } 3283 3284 for (iter = 0; iter < pteCount; iter++) 3285 { 3286 physAddr = physicalAddresses[iter]; 3287 3288 gmmuFieldSetAddress(gmmuFmtPtePhysAddrFld(pPteFmt, aperture), 3289 physAddr, 3290 pte.v8); 3291 3292 if (isCompressedKind) 3293 { 3294 // We have to reset pte.v8 fields in care of partially compressed allocations 3295 // Otherwise, non-compressed PTEs will get bits from compressed PTEs 3296 if (pFmt->version <= GMMU_FMT_VERSION_2) 3297 { 3298 NvBool bIsWarApplied = NV_FALSE; 3299 NvU32 savedKind = comprInfo.kind; 3300 MemoryManager *pMemoryManager = GPU_GET_MEMORY_MANAGER(pMappingGpu); 3301 KernelMemorySystem *pKernelMemorySystem = GPU_GET_KERNEL_MEMORY_SYSTEM(pMappingGpu); 3302 const MEMORY_SYSTEM_STATIC_CONFIG *pMemorySystemConfig = 3303 kmemsysGetStaticConfig(pMappingGpu, pKernelMemorySystem); 3304 3305 nvFieldSet32(&pPteFmt->fldKind, 0, pte.v8); 3306 nvFieldSet32(&pPteFmt->fldCompTagLine, 0, pte.v8); 3307 if (nvFieldIsValid32(&pPteFmt->fldCompTagSubIndex)) 3308 nvFieldSet32(&pPteFmt->fldCompTagSubIndex, 0, pte.v8); 3309 3310 if (pMemorySystemConfig->bUseRawModeComptaglineAllocation && 3311 pMemorySystemConfig->bDisablePlcForCertainOffsetsBug3046774 && 3312 !memmgrIsKind_HAL(pMemoryManager, FB_IS_KIND_DISALLOW_PLC, comprInfo.kind)) 3313 { 3314 NvBool bEnablePlc = NV_TRUE; 3315 3316 if (IS_VIRTUAL_WITH_SRIOV(pMappingGpu) && 3317 gpuIsWarBug200577889SriovHeavyEnabled(pMappingGpu)) 3318 { 3319 bEnablePlc = isPLCable[iter]; 3320 } 3321 else 3322 { 3323 bEnablePlc = kmemsysIsPagePLCable_HAL(pMappingGpu, pKernelMemorySystem, 3324 offset, mappingPageSize); 3325 } 3326 3327 if (!bEnablePlc) 3328 { 3329 bIsWarApplied = NV_TRUE; 3330 memmgrGetDisablePlcKind_HAL(pMemoryManager, &comprInfo.kind); 3331 } 3332 } 3333 3334 kgmmuFieldSetKindCompTags(GPU_GET_KERNEL_GMMU(pMappingGpu), pFmt, pLevelFmt, &comprInfo, physAddr, 3335 offset, mmuFmtVirtAddrToEntryIndex(pLevelFmt, offset), pte.v8); 3336 // 3337 // restore the kind to PLC if changd, since kind is associated with entire surface, and the WAR applies to 3338 // individual pages in the surface. 3339 if (bIsWarApplied) 3340 comprInfo.kind = savedKind; 3341 } 3342 } 3343 3344 portMemCopy(&pGpuExternalMappingInfo->pteBuffer[iter * skipPteCount], pLevelFmt->entrySize, pte.v8, pLevelFmt->entrySize); 3345 3346 offset += mappingPageSize; 3347 } 3348 3349 pGpuExternalMappingInfo->numWrittenPtes = pteCount; 3350 pGpuExternalMappingInfo->numRemainingPtes = (mappingSize / mappingPageSize) - pteCount; 3351 pGpuExternalMappingInfo->pteSize = pLevelFmt->entrySize; 3352 3353 done: 3354 portMemFree(physicalAddresses); 3355 3356 portMemFree(guestPhysicalAddress); 3357 3358 portMemFree(isPLCable); 3359 3360 return status; 3361 } 3362 3363 NV_STATUS nvGpuOpsGetExternalAllocPtes(struct gpuAddressSpace *vaSpace, 3364 NvHandle hMemory, 3365 NvU64 offset, 3366 NvU64 size, 3367 gpuExternalMappingInfo *pGpuExternalMappingInfo) 3368 { 3369 NV_STATUS status = NV_OK; 3370 nvGpuOpsLockSet acquiredLocks; 3371 THREAD_STATE_NODE threadState; 3372 Memory *pMemory = NULL; 3373 PMEMORY_DESCRIPTOR pMemDesc = NULL; 3374 OBJGPU *pMappingGpu = NULL; 3375 NvU32 peerId = 0; 3376 NvBool isSliSupported = NV_FALSE; 3377 NvBool isPeerSupported = NV_FALSE; 3378 NvBool isIndirectPeerSupported = NV_FALSE; 3379 OBJVASPACE *pVAS = NULL; 3380 FlaMemory *pFlaMemory = NULL; 3381 OBJGPU *pSrcGpu = NULL; 3382 OBJGPU *pPeerGpu = NULL; 3383 RsClient *pClient; 3384 MEMORY_DESCRIPTOR *pAdjustedMemDesc = NULL; 3385 FABRIC_VASPACE *pFabricVAS = NULL; 3386 3387 if (!pGpuExternalMappingInfo || !hMemory || !vaSpace) 3388 return NV_ERR_INVALID_ARGUMENT; 3389 3390 threadStateInit(&threadState, THREAD_STATE_FLAGS_NONE); 3391 status = _nvGpuOpsLocksAcquireAll(RMAPI_LOCK_FLAGS_READ, 3392 vaSpace->device->session->handle, 3393 &pClient, 3394 &acquiredLocks); 3395 if (status != NV_OK) 3396 { 3397 threadStateFree(&threadState, THREAD_STATE_FLAGS_NONE); 3398 return status; 3399 } 3400 3401 status = vaspaceGetByHandleOrDeviceDefault(pClient, 3402 vaSpace->device->handle, 3403 vaSpace->handle, 3404 &pVAS); 3405 if (status != NV_OK) 3406 goto done; 3407 3408 status = nvGpuOpsGetMemoryByHandle(vaSpace->device->session->handle, 3409 hMemory, 3410 &pMemory); 3411 if (status != NV_OK) 3412 goto done; 3413 3414 // RM client allocations can't have multiple subDevice memdescs. 3415 pMemDesc = pMemory->pMemDesc; 3416 NV_ASSERT(!memdescHasSubDeviceMemDescs(pMemDesc)); 3417 3418 // Do not support mapping on anything other than sysmem/vidmem/fabric! 3419 if ((memdescGetAddressSpace(pMemDesc) != ADDR_SYSMEM) && 3420 (memdescGetAddressSpace(pMemDesc) != ADDR_FBMEM) && 3421 (memdescGetAddressSpace(pMemDesc) != ADDR_FABRIC_MC) && 3422 (memdescGetAddressSpace(pMemDesc) != ADDR_FABRIC_V2)) 3423 { 3424 status = NV_ERR_NOT_SUPPORTED; 3425 goto done; 3426 } 3427 3428 status = CliSetGpuContext(vaSpace->device->session->handle, 3429 vaSpace->device->handle, 3430 &pMappingGpu, 3431 NULL); 3432 if (status != NV_OK) 3433 goto done; 3434 3435 pAdjustedMemDesc = pMemDesc; 3436 pFabricVAS = dynamicCast(pMappingGpu->pFabricVAS, FABRIC_VASPACE); 3437 if (pFabricVAS != NULL) 3438 { 3439 status = fabricvaspaceGetGpaMemdesc(pFabricVAS, pMemDesc, pMappingGpu, &pAdjustedMemDesc); 3440 if (status != NV_OK) 3441 goto done; 3442 } 3443 3444 // Check if P2P supported 3445 if ( 3446 (memdescGetAddressSpace(pAdjustedMemDesc) == ADDR_FABRIC_MC) || 3447 (memdescGetAddressSpace(pAdjustedMemDesc) == ADDR_FABRIC_V2)) 3448 { 3449 KernelNvlink *pKernelNvlink = GPU_GET_KERNEL_NVLINK(pMappingGpu); 3450 3451 isPeerSupported = NV_TRUE; 3452 pPeerGpu = pAdjustedMemDesc->pGpu; 3453 peerId = BUS_INVALID_PEER; 3454 3455 if (!memIsGpuMapAllowed(pMemory, pMappingGpu)) 3456 { 3457 NV_PRINTF(LEVEL_ERROR, 3458 "Mapping Gpu is not attached to the given memory object\n"); 3459 status = NV_ERR_INVALID_STATE; 3460 goto freeGpaMemdesc; 3461 } 3462 3463 if (pPeerGpu != NULL) 3464 { 3465 if ((pKernelNvlink != NULL) && 3466 knvlinkIsNvlinkP2pSupported(pMappingGpu, pKernelNvlink, pPeerGpu)) 3467 { 3468 peerId = kbusGetPeerId_HAL(pMappingGpu, GPU_GET_KERNEL_BUS(pMappingGpu), pPeerGpu); 3469 } 3470 } 3471 else 3472 { 3473 peerId = kbusGetNvSwitchPeerId_HAL(pMappingGpu, 3474 GPU_GET_KERNEL_BUS(pMappingGpu)); 3475 } 3476 3477 if (peerId == BUS_INVALID_PEER) 3478 { 3479 status = NV_ERR_INVALID_STATE; 3480 goto freeGpaMemdesc; 3481 } 3482 } 3483 else if (memdescGetAddressSpace(pAdjustedMemDesc) == ADDR_FBMEM && 3484 (pAdjustedMemDesc->pGpu->gpuId != pMappingGpu->gpuId || 3485 dynamicCast(pMemory, FlaMemory))) 3486 { 3487 if (gpumgrCheckIndirectPeer(pAdjustedMemDesc->pGpu, pMappingGpu)) 3488 { 3489 isIndirectPeerSupported = NV_TRUE; 3490 } 3491 else 3492 { 3493 pFlaMemory = dynamicCast(pMemory, FlaMemory); 3494 if (pFlaMemory != NULL) 3495 { 3496 pSrcGpu = gpumgrGetGpu(pFlaMemory->peerGpuInst); 3497 if (!pSrcGpu) 3498 { 3499 status = NV_ERR_INVALID_ARGUMENT; 3500 goto freeGpaMemdesc; 3501 } 3502 } 3503 3504 status = nvGpuOpsGetExternalAllocP2pInfo(vaSpace->device->session, 3505 (pFlaMemory) ? (pSrcGpu->gpuId) :(pAdjustedMemDesc->pGpu->gpuId), 3506 pMappingGpu->gpuId, 3507 &isPeerSupported, 3508 &peerId); 3509 if (status != NV_OK) 3510 goto freeGpaMemdesc; 3511 } 3512 3513 // 3514 // If GPUs are in the same SLI group, don't do peer mappings even if the GPUs are different. In SLI config, 3515 // if a caller can try to map a memory on a GPU other than the GPU which is associated with the memdesc, 3516 // always return local VIDMEM mapping because RM shares a memdesc among such GPUs for client allocations. 3517 // Note: This check could be avoided if we could know that pMemDesc->pGpu is always the SLI master i.e. same 3518 // as the pGPU returned by CliSetGpuContext. 3519 // 3520 if (!pFlaMemory && pAdjustedMemDesc->pGpu->deviceInstance == pMappingGpu->deviceInstance) 3521 { 3522 isPeerSupported = NV_FALSE; 3523 isSliSupported = NV_TRUE; 3524 } 3525 3526 // Even if the RM returns P2P or indirect peer supported, make sure the GPUs are not from different SLI groups. See Bug# 759980. 3527 if ((isPeerSupported || isIndirectPeerSupported) && 3528 (IsSLIEnabled(pMappingGpu) || IsSLIEnabled(pAdjustedMemDesc->pGpu))) 3529 { 3530 status = NV_ERR_NOT_SUPPORTED; 3531 goto freeGpaMemdesc; 3532 } 3533 3534 NV_ASSERT(!(isPeerSupported && isSliSupported)); 3535 3536 // If a caller is trying to map VIDMEM on GPUs with no P2P support and are not in the same SLI group, error out. 3537 if (!isPeerSupported && !isIndirectPeerSupported && !isSliSupported) 3538 { 3539 status = NV_ERR_NOT_SUPPORTED; 3540 goto freeGpaMemdesc; 3541 } 3542 } 3543 3544 status = nvGpuOpsBuildExternalAllocPtes(pVAS, pMappingGpu, pAdjustedMemDesc, pMemory, offset, size, 3545 isIndirectPeerSupported, isPeerSupported, peerId, 3546 pGpuExternalMappingInfo); 3547 3548 freeGpaMemdesc: 3549 if (pAdjustedMemDesc != pMemDesc) 3550 fabricvaspacePutGpaMemdesc(pFabricVAS, pAdjustedMemDesc); 3551 3552 done: 3553 _nvGpuOpsLocksRelease(&acquiredLocks); 3554 threadStateFree(&threadState, THREAD_STATE_FLAGS_NONE); 3555 return status; 3556 } 3557 3558 void nvGpuOpsAddressSpaceDestroy(struct gpuAddressSpace *vaSpace) 3559 { 3560 RM_API *pRmApi = rmapiGetInterface(RMAPI_EXTERNAL_KERNEL); 3561 3562 NV_ASSERT(vaSpace->dummyGpuAlloc.refCount == 0); 3563 3564 // free all the mallocs 3565 if (vaSpace->allocations) 3566 { 3567 portSyncRwLockAcquireWrite(vaSpace->allocationsLock); 3568 destroyAllGpuMemDescriptors(vaSpace->device->session->handle, 3569 vaSpace->allocations); 3570 portSyncRwLockReleaseWrite(vaSpace->allocationsLock); 3571 } 3572 3573 // free all the physical allocations 3574 if (vaSpace->physAllocations) 3575 { 3576 portSyncRwLockAcquireWrite(vaSpace->physAllocationsLock); 3577 destroyAllGpuMemDescriptors(vaSpace->device->session->handle, 3578 vaSpace->physAllocations); 3579 portSyncRwLockReleaseWrite(vaSpace->physAllocationsLock); 3580 } 3581 3582 // Destroy CPU mappings 3583 if (vaSpace->cpuMappings) 3584 { 3585 portSyncRwLockAcquireWrite(vaSpace->cpuMappingsLock); 3586 btreeDestroyData(vaSpace->cpuMappings); 3587 portSyncRwLockReleaseWrite(vaSpace->cpuMappingsLock); 3588 } 3589 3590 if (vaSpace->handle) 3591 pRmApi->Free(pRmApi, vaSpace->device->session->handle, vaSpace->handle); 3592 3593 portSyncRwLockDestroy(vaSpace->allocationsLock); 3594 portSyncRwLockDestroy(vaSpace->cpuMappingsLock); 3595 portSyncRwLockDestroy(vaSpace->physAllocationsLock); 3596 3597 portMemFree(vaSpace); 3598 } 3599 3600 static NV_STATUS nvGpuOpsAllocPhysical(struct gpuDevice *device, 3601 NvBool isSystemMemory, 3602 NvLength length, 3603 NvU64 *paOffset, 3604 gpuAllocInfo *allocInfo) 3605 { 3606 NV_MEMORY_ALLOCATION_PARAMS memAllocParams = {0}; 3607 NV_STATUS status = NV_OK; 3608 RM_API *pRmApi = rmapiGetInterface(RMAPI_EXTERNAL_KERNEL); 3609 3610 NvHandle physHandle = 0; 3611 3612 NV_ASSERT(allocInfo); 3613 NV_ASSERT(device); 3614 NV_ASSERT(paOffset); 3615 3616 // then allocate the physical memory in either sysmem or fb. 3617 memAllocParams.owner = HEAP_OWNER_RM_KERNEL_CLIENT; 3618 3619 // Physical allocations don't expect vaSpace handles 3620 memAllocParams.hVASpace = 0; 3621 3622 // Reset previous offset 3623 memAllocParams.offset = 0; 3624 3625 memAllocParams.size = length; 3626 memAllocParams.type = NVOS32_TYPE_IMAGE; 3627 memAllocParams.attr = isSystemMemory ? 3628 DRF_DEF(OS32, _ATTR, _LOCATION, _PCI) : 3629 DRF_DEF(OS32, _ATTR, _LOCATION, _VIDMEM); 3630 3631 // Always enable caching for System Memory as all the currently supported 3632 // platforms are IO coherent. 3633 memAllocParams.attr |= isSystemMemory ? 3634 DRF_DEF(OS32, _ATTR, _COHERENCY, _CACHED): 3635 DRF_DEF(OS32, _ATTR, _COHERENCY, _UNCACHED); 3636 3637 // Allocate contigous allocation if requested by client 3638 memAllocParams.attr |= allocInfo->bContiguousPhysAlloc ? 3639 DRF_DEF(OS32, _ATTR, _PHYSICALITY, _CONTIGUOUS): 3640 DRF_DEF(OS32, _ATTR, _PHYSICALITY, _DEFAULT); 3641 3642 // Set pageSize for PA-allocation. RM default is Big page size 3643 switch (allocInfo->pageSize) 3644 { 3645 case RM_PAGE_SIZE: 3646 memAllocParams.attr |= DRF_DEF(OS32, _ATTR, _PAGE_SIZE, _4KB); 3647 break; 3648 case RM_PAGE_SIZE_64K: 3649 case RM_PAGE_SIZE_128K: 3650 memAllocParams.attr |= DRF_DEF(OS32, _ATTR, _PAGE_SIZE, _BIG); 3651 break; 3652 case RM_PAGE_SIZE_HUGE: 3653 memAllocParams.attr |= DRF_DEF(OS32, _ATTR, _PAGE_SIZE, _HUGE); 3654 memAllocParams.attr2 |= DRF_DEF(OS32, _ATTR2, _PAGE_SIZE_HUGE, _2MB); 3655 break; 3656 case RM_PAGE_SIZE_512M: 3657 memAllocParams.attr |= DRF_DEF(OS32, _ATTR, _PAGE_SIZE, _HUGE); 3658 memAllocParams.attr2 |= DRF_DEF(OS32, _ATTR2, _PAGE_SIZE_HUGE, _512MB); 3659 break; 3660 default: 3661 memAllocParams.attr |= DRF_DEF(OS32, _ATTR, _PAGE_SIZE, _DEFAULT); 3662 break; 3663 } 3664 3665 // Do we need to allocate at top of FB 3666 if (allocInfo->bMemGrowsDown) 3667 memAllocParams.flags |= NVOS32_ALLOC_FLAGS_FORCE_MEM_GROWS_DOWN; 3668 3669 // Ask RM to allocate persistent video memory 3670 if (!isSystemMemory && allocInfo->bPersistentVidmem) 3671 memAllocParams.flags |= NVOS32_ALLOC_FLAGS_PERSISTENT_VIDMEM; 3672 3673 // 3674 // Indicate to the RM that the allocation should be in unprotected memory. 3675 // If the Confidential Computing feature is not enabled on the system, this 3676 // flag has no effect. 3677 // 3678 if (allocInfo->bUnprotected) 3679 { 3680 memAllocParams.attr2 = FLD_SET_DRF(OS32, _ATTR2, _MEMORY_PROTECTION, 3681 _UNPROTECTED, memAllocParams.attr2); 3682 } 3683 3684 // 3685 // vid heap ctrl has a different policy as compared to other internal APIS 3686 // it expects the gpu lock to not be held. This means we have to drop the gpu lock 3687 // here. It is safe in this scenario because we still have the API lock and nothing 3688 // from a GPU interrupt can change anything in the OPS state. 3689 // 3690 3691 physHandle = NV01_NULL_OBJECT; 3692 NV_ASSERT_OK_OR_GOTO(status, pRmApi->Alloc(pRmApi, 3693 device->session->handle, 3694 isSystemMemory ? device->handle : device->subhandle, 3695 &physHandle, 3696 isSystemMemory ? NV01_MEMORY_SYSTEM : NV01_MEMORY_LOCAL_USER, 3697 &memAllocParams, 3698 sizeof(memAllocParams)), done); 3699 if (allocInfo->bContiguousPhysAlloc) 3700 allocInfo->gpuPhysOffset = memAllocParams.offset; 3701 3702 allocInfo->hPhysHandle = physHandle; 3703 *paOffset = (NvU64)allocInfo->gpuPhysOffset; 3704 3705 done: 3706 3707 if (status != NV_OK) 3708 pRmApi->Free(pRmApi, device->session->handle, physHandle); 3709 3710 return status; 3711 } 3712 3713 // The call allocates a virtual memory and associates a PA with it. 3714 static NV_STATUS nvGpuOpsAllocVirtual(struct gpuAddressSpace *vaSpace, 3715 NvLength length, 3716 NvU64 *vaOffset, 3717 NvHandle physHandle, 3718 struct allocFlags flags, 3719 gpuVaAllocInfo *allocInfo) 3720 { 3721 NV_MEMORY_ALLOCATION_PARAMS memAllocParams = { 0 }; 3722 NV_STATUS status; 3723 gpuMemDesc *memDesc = NULL; 3724 RM_API *pRmApi = rmapiGetInterface(RMAPI_EXTERNAL_KERNEL); 3725 3726 NV_ASSERT(allocInfo); 3727 NV_ASSERT(vaSpace); 3728 NV_ASSERT(vaOffset); 3729 NV_ASSERT(physHandle); 3730 3731 memDesc = portMemAllocNonPaged(sizeof(*memDesc)); 3732 if (memDesc == NULL) 3733 return NV_ERR_NO_MEMORY; 3734 3735 // first allocate the virtual memory 3736 3737 memAllocParams.owner = HEAP_OWNER_RM_KERNEL_CLIENT; 3738 memAllocParams.size = length; 3739 memAllocParams.type = NVOS32_TYPE_IMAGE; 3740 memAllocParams.alignment = allocInfo->alignment ? allocInfo->alignment : NV_GPU_SMALL_PAGESIZE; 3741 memAllocParams.flags = NVOS32_ALLOC_FLAGS_VIRTUAL | 3742 NVOS32_ALLOC_FLAGS_ALIGNMENT_FORCE | 3743 NVOS32_ALLOC_FLAGS_ALLOCATE_KERNEL_PRIVILEGED; 3744 3745 // Set pageSize for VA-allocation. RM default is Big page size 3746 switch (allocInfo->pageSize) 3747 { 3748 case RM_PAGE_SIZE: 3749 memAllocParams.attr |= DRF_DEF(OS32, _ATTR, _PAGE_SIZE, _4KB); 3750 break; 3751 case RM_PAGE_SIZE_64K: 3752 case RM_PAGE_SIZE_128K: 3753 memAllocParams.attr |= DRF_DEF(OS32, _ATTR, _PAGE_SIZE, _BIG); 3754 break; 3755 case RM_PAGE_SIZE_HUGE: 3756 memAllocParams.attr |= DRF_DEF(OS32, _ATTR, _PAGE_SIZE, _HUGE); 3757 memAllocParams.attr2 |= DRF_DEF(OS32, _ATTR2, _PAGE_SIZE_HUGE, _2MB); 3758 break; 3759 case RM_PAGE_SIZE_512M: 3760 memAllocParams.attr |= DRF_DEF(OS32, _ATTR, _PAGE_SIZE, _HUGE); 3761 memAllocParams.attr2 |= DRF_DEF(OS32, _ATTR2, _PAGE_SIZE_HUGE, _512MB); 3762 break; 3763 default: 3764 memAllocParams.attr |= DRF_DEF(OS32, _ATTR, _PAGE_SIZE, _DEFAULT); 3765 break; 3766 } 3767 3768 memAllocParams.hVASpace = vaSpace->handle; 3769 3770 memDesc->handle = NV01_NULL_OBJECT; 3771 NV_ASSERT_OK_OR_GOTO(status, pRmApi->Alloc(pRmApi, 3772 vaSpace->device->session->handle, 3773 vaSpace->device->handle, 3774 &memDesc->handle, 3775 NV50_MEMORY_VIRTUAL, 3776 &memAllocParams, 3777 sizeof(memAllocParams)), done); 3778 memDesc->address = (NvU64)memAllocParams.offset; 3779 memDesc->size = length; 3780 memDesc->childHandle = physHandle; 3781 3782 portSyncRwLockAcquireWrite(vaSpace->allocationsLock); 3783 status = trackDescriptor(&vaSpace->allocations, memDesc->address, memDesc); 3784 portSyncRwLockReleaseWrite(vaSpace->allocationsLock); 3785 3786 if (status != NV_OK) 3787 goto done; 3788 3789 // return the allocated GPU VA 3790 *vaOffset = memDesc->address; 3791 3792 done: 3793 3794 if (status != NV_OK) 3795 pRmApi->Free(pRmApi, vaSpace->device->session->handle, memDesc->handle); 3796 3797 if ((status != NV_OK) && (memDesc != NULL)) 3798 portMemFree(memDesc); 3799 3800 return status; 3801 } 3802 3803 // will need to support offset within allocation 3804 static NV_STATUS nvGpuOpsMapGpuMemory(struct gpuAddressSpace *vaSpace, 3805 NvU64 vaOffset, 3806 NvLength length, 3807 NvU64 pageSize, 3808 NvU64 *gpuOffset, 3809 struct allocFlags flags) 3810 { 3811 gpuMemDesc *memDescVa = NULL; 3812 NV_STATUS status; 3813 NvU64 mappedVa = 0; 3814 NvU32 mapFlags = 0; 3815 NvU64 mapPageSize = 0; 3816 RM_API *pRmApi = rmapiGetInterface(RMAPI_EXTERNAL_KERNEL); 3817 3818 if (!vaSpace || !gpuOffset) 3819 return NV_ERR_INVALID_ARGUMENT; 3820 3821 portSyncRwLockAcquireRead(vaSpace->allocationsLock); 3822 status = findDescriptor(vaSpace->allocations, vaOffset, (void**)&memDescVa); 3823 portSyncRwLockReleaseRead(vaSpace->allocationsLock); 3824 if (status != NV_OK) 3825 return status; 3826 3827 NV_ASSERT(memDescVa); 3828 NV_ASSERT(memDescVa->handle); 3829 NV_ASSERT(memDescVa->childHandle); 3830 3831 if (pageSize == RM_PAGE_SIZE) 3832 { 3833 mapPageSize |= DRF_DEF(OS46, _FLAGS, _PAGE_SIZE, _4KB); 3834 } 3835 else if (pageSize == RM_PAGE_SIZE_HUGE) 3836 { 3837 // TODO: this flag is ignored, remove it once it is deprecated 3838 mapPageSize |= DRF_DEF(OS46, _FLAGS, _PAGE_SIZE, _HUGE); 3839 } 3840 else 3841 { 3842 mapPageSize |= DRF_DEF(OS46, _FLAGS, _PAGE_SIZE, _DEFAULT); 3843 } 3844 3845 // map the 2 surfaces 3846 mapFlags |= ((flags.bGetKernelVA) ? DRF_DEF(OS46, _FLAGS, _KERNEL_MAPPING, _ENABLE) : 3847 DRF_DEF(OS46, _FLAGS, _KERNEL_MAPPING, _NONE)); 3848 mapFlags |= mapPageSize; 3849 3850 // Always enable snooping as that's what's needed for sysmem allocations and 3851 // it's ignored for vidmem. 3852 mapFlags |= DRF_DEF(OS46, _FLAGS, _CACHE_SNOOP, _ENABLE); 3853 3854 // map the 2 surfaces 3855 status = pRmApi->Map(pRmApi, 3856 vaSpace->device->session->handle, 3857 vaSpace->device->handle, 3858 memDescVa->handle, 3859 memDescVa->childHandle, 3860 0, 3861 length, 3862 mapFlags, 3863 &mappedVa); 3864 if (status != NV_OK) 3865 return status; 3866 3867 NV_ASSERT(memDescVa->address == mappedVa); 3868 3869 *gpuOffset = memDescVa->address; 3870 3871 return NV_OK; 3872 } 3873 3874 // 3875 // This function provides a gpu virtual address to a physical region 3876 // that can either be in sysmem or vidmem. 3877 // 3878 static NV_STATUS nvGpuOpsGpuMalloc(struct gpuAddressSpace *vaSpace, 3879 NvBool isSystemMemory, 3880 NvLength length, 3881 NvU64 *gpuOffset, 3882 struct allocFlags flags, 3883 gpuAllocInfo *allocInfo) 3884 { 3885 NV_STATUS status; 3886 NvU64 vaOffset = 0; 3887 NvU64 paOffset = 0; 3888 gpuVaAllocInfo vaAllocInfo = { 0 }; 3889 NvHandle paMemDescHandle; 3890 RM_API *pRmApi = rmapiGetInterface(RMAPI_EXTERNAL_KERNEL); 3891 3892 NV_ASSERT(allocInfo); 3893 NV_ASSERT(vaSpace); 3894 NV_ASSERT(gpuOffset); 3895 3896 // Allocate physical memory first. So that we can associate PA with the memDesc of VA. 3897 // This simplifies tracking of VA and PA handles. 3898 status = nvGpuOpsAllocPhysical(vaSpace->device, isSystemMemory, length, 3899 &paOffset, allocInfo); 3900 if (status != NV_OK) 3901 return status; 3902 3903 NV_ASSERT(allocInfo->hPhysHandle); 3904 3905 paMemDescHandle = allocInfo->hPhysHandle; 3906 vaAllocInfo.pageSize = allocInfo->pageSize; 3907 vaAllocInfo.alignment = allocInfo->alignment; 3908 3909 status = nvGpuOpsAllocVirtual(vaSpace, length, &vaOffset, paMemDescHandle, 3910 flags, &vaAllocInfo); 3911 if (status != NV_OK) 3912 goto cleanup_physical; 3913 3914 status = nvGpuOpsMapGpuMemory(vaSpace, vaOffset, length, 3915 allocInfo->pageSize, gpuOffset, flags); 3916 if (status != NV_OK) 3917 goto cleanup_virtual; 3918 3919 return NV_OK; 3920 3921 cleanup_virtual: 3922 nvGpuOpsFreeVirtual(vaSpace, vaOffset); 3923 cleanup_physical: 3924 pRmApi->Free(pRmApi, vaSpace->device->session->handle, paMemDescHandle); 3925 return status; 3926 } 3927 3928 // This function is generic and can be used outside CC as well. 3929 // As of today the only caller of this function is under CC checks 3930 // Hence this is also protected under the same checks. Otherwise, 3931 // builds will fail. 3932 static void nvGpuOpsUnmapGpuMemory(struct gpuAddressSpace *vaSpace, 3933 NvU64 gpuOffset) 3934 { 3935 gpuMemDesc *memDescVa = NULL; 3936 RM_API *pRmApi = rmapiGetInterface(RMAPI_EXTERNAL_KERNEL); 3937 3938 NV_ASSERT(vaSpace != NULL); 3939 3940 portSyncRwLockAcquireRead(vaSpace->allocationsLock); 3941 NV_ASSERT_OK(findDescriptor(vaSpace->allocations, gpuOffset, (void**)&memDescVa)); 3942 portSyncRwLockReleaseRead(vaSpace->allocationsLock); 3943 3944 NV_ASSERT(memDescVa != NULL); 3945 NV_ASSERT(memDescVa->handle != 0); 3946 NV_ASSERT(memDescVa->childHandle != 0); 3947 NV_ASSERT(memDescVa->address == gpuOffset); 3948 3949 NV_ASSERT_OK(pRmApi->Unmap(pRmApi, 3950 vaSpace->device->session->handle, 3951 vaSpace->device->handle, 3952 memDescVa->handle, 3953 memDescVa->childHandle, 3954 NV04_MAP_MEMORY_FLAGS_NONE, 3955 gpuOffset)); 3956 3957 return; 3958 } 3959 3960 static void nvGpuOpsFreeVirtual(struct gpuAddressSpace *vaSpace, NvU64 vaOffset) 3961 { 3962 RM_API *pRmApi = rmapiGetInterface(RMAPI_EXTERNAL_KERNEL); 3963 gpuMemDesc *memDescVa = NULL; 3964 portSyncRwLockAcquireWrite(vaSpace->allocationsLock); 3965 deleteDescriptor(&vaSpace->allocations, vaOffset, (void**)&memDescVa); 3966 portSyncRwLockReleaseWrite(vaSpace->allocationsLock); 3967 NV_ASSERT(memDescVa); 3968 pRmApi->Free(pRmApi, vaSpace->device->session->handle, memDescVa->handle); 3969 portMemFree(memDescVa); 3970 } 3971 3972 NV_STATUS nvGpuOpsMemoryAllocFb(struct gpuAddressSpace *vaSpace, 3973 NvLength length, 3974 NvU64 *gpuOffset, 3975 gpuAllocInfo *allocInfo) 3976 { 3977 gpuAllocInfo allocInfoTemp = {0}; 3978 gpuAllocInfo *pAllocInfo; 3979 struct allocFlags flags = {0}; 3980 3981 if (!vaSpace || !gpuOffset) 3982 return NV_ERR_INVALID_ARGUMENT; 3983 3984 // Use default settings if user hasn't provided one. 3985 if (allocInfo == NULL) 3986 { 3987 pAllocInfo = &allocInfoTemp; 3988 } 3989 else 3990 { 3991 pAllocInfo = allocInfo; 3992 } 3993 3994 return nvGpuOpsGpuMalloc(vaSpace, NV_FALSE, length, gpuOffset, flags, 3995 pAllocInfo); 3996 } 3997 3998 NV_STATUS nvGpuOpsMemoryAllocSys(struct gpuAddressSpace *vaSpace, 3999 NvLength length, 4000 NvU64 *gpuOffset, 4001 gpuAllocInfo *allocInfo) 4002 { 4003 gpuAllocInfo allocInfoTemp = {0}; 4004 gpuAllocInfo *pAllocInfo; 4005 struct allocFlags flags = {0}; 4006 4007 if (!vaSpace || !gpuOffset) 4008 return NV_ERR_INVALID_ARGUMENT; 4009 4010 // Use default settings if user hasn't provided one. 4011 if (allocInfo == NULL) 4012 { 4013 pAllocInfo = &allocInfoTemp; 4014 } 4015 else 4016 { 4017 pAllocInfo = allocInfo; 4018 } 4019 4020 return nvGpuOpsGpuMalloc(vaSpace, NV_TRUE, length, gpuOffset, flags, 4021 pAllocInfo); 4022 } 4023 4024 NV_STATUS nvGpuOpsMemoryReopen(struct gpuAddressSpace *vaSpace, 4025 NvHandle hSrcClient, 4026 NvHandle hSrcAllocation, 4027 NvLength length, 4028 NvU64 *gpuOffset) 4029 { 4030 NV_STATUS status; 4031 NvHandle hAllocation = 0; 4032 gpuVaAllocInfo allocInfoTemp = { 0 }; 4033 struct allocFlags flags = { 0 }; 4034 NvU64 vaOffset; 4035 NvHandle hVirtual = 0; 4036 RsResourceRef *pResourceRef; 4037 NvU64 addressOffset = 0; 4038 NvHandle hParent; 4039 RM_API *pRmApi = rmapiGetInterface(RMAPI_EXTERNAL_KERNEL); 4040 4041 // find device type 4042 // TODO: Acquired because serverutilGetResourceRef expects RMAPI lock. Necessary? 4043 status = rmapiLockAcquire(RMAPI_LOCK_FLAGS_READ, RM_LOCK_MODULES_GPU_OPS); 4044 if (status != NV_OK) 4045 return status; 4046 4047 status = serverutilGetResourceRef(hSrcClient, hSrcAllocation, &pResourceRef); 4048 if (status != NV_OK) 4049 { 4050 rmapiLockRelease(); 4051 return NV_ERR_OBJECT_NOT_FOUND; 4052 } 4053 4054 if (!dynamicCast(pResourceRef->pResource, Memory)) 4055 { 4056 rmapiLockRelease(); 4057 return NV_ERR_INVALID_OBJECT_HANDLE; 4058 } 4059 4060 hParent = pResourceRef->pParentRef ? pResourceRef->pParentRef->hResource : 0; 4061 4062 status = serverutilGetResourceRef(hSrcClient, hParent, &pResourceRef); 4063 rmapiLockRelease(); 4064 if (status != NV_OK || !dynamicCast(pResourceRef->pResource, Device)) 4065 return NV_ERR_GENERIC; 4066 4067 if (!vaSpace || !gpuOffset || !hSrcAllocation || !hSrcClient) 4068 return NV_ERR_INVALID_ARGUMENT; 4069 4070 // Dup the physical memory object 4071 hAllocation = NV01_NULL_OBJECT; 4072 status = pRmApi->DupObject(pRmApi, 4073 vaSpace->device->session->handle, 4074 vaSpace->device->handle, 4075 &hAllocation, 4076 hSrcClient, 4077 hSrcAllocation, 4078 NV04_DUP_HANDLE_FLAGS_REJECT_KERNEL_DUP_PRIVILEGE); 4079 if (status != NV_OK) 4080 return status; 4081 4082 // Associate the duped object with the newly created virtual memory object 4083 status = nvGpuOpsAllocVirtual(vaSpace, length, &vaOffset, hAllocation, 4084 flags, &allocInfoTemp); 4085 if (status != NV_OK) 4086 goto cleanup_dup; 4087 4088 status = getHandleForVirtualAddr(vaSpace, vaOffset, NV_FALSE, &hVirtual); 4089 if (status != NV_OK) 4090 goto cleanup_virt_allocation; 4091 4092 // map the memory 4093 status = pRmApi->Map(pRmApi, 4094 vaSpace->device->session->handle, 4095 vaSpace->device->handle, 4096 hVirtual, 4097 hAllocation, 4098 0, 4099 length, 4100 0, 4101 &addressOffset); 4102 if (status != NV_OK) 4103 goto cleanup_virt_allocation; 4104 4105 NV_ASSERT((vaOffset == addressOffset) && "nvGpuOpsMemoryReopen: VA offset Mistmatch!"); 4106 4107 // return the mapped GPU pointer 4108 *gpuOffset = vaOffset; 4109 4110 return NV_OK; 4111 4112 cleanup_virt_allocation: 4113 nvGpuOpsFreeVirtual(vaSpace, vaOffset); 4114 cleanup_dup: 4115 pRmApi->Free(pRmApi, vaSpace->device->session->handle, hAllocation); 4116 return status; 4117 } 4118 4119 NV_STATUS nvGpuOpsPmaAllocPages(void *pPma, NvLength pageCount, NvU64 pageSize, 4120 gpuPmaAllocationOptions *pPmaAllocOptions, 4121 NvU64 *pPages) 4122 { 4123 NV_STATUS status; 4124 gpuPmaAllocationOptions pmaAllocOptionsTemp = {0}; 4125 gpuPmaAllocationOptions *pAllocInfo; 4126 THREAD_STATE_NODE threadState; 4127 4128 if (!pPma || !pPages) 4129 return NV_ERR_INVALID_ARGUMENT; 4130 4131 threadStateInit(&threadState, THREAD_STATE_FLAGS_NONE); 4132 4133 // Use default settings if user hasn't provided one. 4134 if (NULL == pPmaAllocOptions) 4135 { 4136 pAllocInfo = &pmaAllocOptionsTemp; 4137 } 4138 else 4139 { 4140 pAllocInfo = pPmaAllocOptions; 4141 } 4142 4143 // Invoke PMA module to alloc pages. 4144 status = pmaAllocatePages((PMA *)pPma, 4145 pageCount, 4146 pageSize, 4147 (PMA_ALLOCATION_OPTIONS *)pAllocInfo, 4148 pPages); 4149 4150 threadStateFree(&threadState, THREAD_STATE_FLAGS_NONE); 4151 return status; 4152 } 4153 4154 // 4155 // When this API is called from UVM as part of PMA eviction, the thread state 4156 // should have been initialized already and recursive re-init needs to be 4157 // skipped as it's not supported. 4158 // 4159 NV_STATUS nvGpuOpsPmaPinPages(void *pPma, 4160 NvU64 *pPages, 4161 NvLength pageCount, 4162 NvU64 pageSize, 4163 NvU32 flags) 4164 { 4165 NV_STATUS status; 4166 THREAD_STATE_NODE threadState; 4167 NvBool pmaEvictionCall = (flags & UVM_PMA_CALLED_FROM_PMA_EVICTION) != 0; 4168 4169 if (!pPma || !pPages) 4170 return NV_ERR_INVALID_ARGUMENT; 4171 4172 if (!pmaEvictionCall) 4173 threadStateInit(&threadState, THREAD_STATE_FLAGS_NONE); 4174 4175 // Invoke PMA module to Pin pages. 4176 status = pmaPinPages((PMA *)pPma, pPages, pageCount, pageSize); 4177 4178 if (!pmaEvictionCall) 4179 threadStateFree(&threadState, THREAD_STATE_FLAGS_NONE); 4180 return status; 4181 } 4182 4183 NV_STATUS nvGpuOpsPmaUnpinPages(void *pPma, 4184 NvU64 *pPages, 4185 NvLength pageCount, 4186 NvU64 pageSize) 4187 { 4188 NV_STATUS status; 4189 THREAD_STATE_NODE threadState; 4190 threadStateInit(&threadState, THREAD_STATE_FLAGS_NONE); 4191 4192 if (!pPma || !pPages) 4193 return NV_ERR_INVALID_ARGUMENT; 4194 4195 // Invoke PMA module to Unpin pages. 4196 status = pmaUnpinPages((PMA *)pPma, pPages, pageCount, pageSize); 4197 4198 threadStateFree(&threadState, THREAD_STATE_FLAGS_NONE); 4199 return status; 4200 } 4201 4202 // 4203 // When this API is called from UVM as part of PMA eviction, the thread state 4204 // should have been initialized already and recursive re-init needs to be 4205 // skipped as it's not supported. 4206 // 4207 void nvGpuOpsPmaFreePages(void *pPma, 4208 NvU64 *pPages, 4209 NvLength pageCount, 4210 NvU64 pageSize, 4211 NvU32 flags) 4212 { 4213 THREAD_STATE_NODE threadState; 4214 NvU32 pmaFreeFlag = ((flags & UVM_PMA_FREE_IS_ZERO) ? PMA_FREE_SKIP_SCRUB : 0); 4215 NvBool pmaEvictionCall = (flags & UVM_PMA_CALLED_FROM_PMA_EVICTION) != 0; 4216 4217 if (!pmaEvictionCall) 4218 threadStateInit(&threadState, THREAD_STATE_FLAGS_NONE); 4219 4220 if (!pPma || !pPages) 4221 return; 4222 4223 // Invoke PMA module to free pages. 4224 if (flags & UVM_PMA_ALLOCATE_CONTIGUOUS) 4225 pmaFreePages((PMA *)pPma, pPages, 1, pageCount * pageSize, pmaFreeFlag); 4226 else 4227 pmaFreePages((PMA *)pPma, pPages, pageCount, pageSize, pmaFreeFlag); 4228 4229 if (!pmaEvictionCall) 4230 threadStateFree(&threadState, THREAD_STATE_FLAGS_NONE); 4231 } 4232 4233 static NV_STATUS nvGpuOpsChannelGetHwChannelId(struct gpuChannel *channel, 4234 NvU32 *hwChannelId) 4235 { 4236 NV0080_CTRL_FIFO_GET_CHANNELLIST_PARAMS params = {0}; 4237 RM_API *pRmApi = rmapiGetInterface(RMAPI_EXTERNAL_KERNEL); 4238 4239 params.numChannels = 1; 4240 params.pChannelHandleList = NV_PTR_TO_NvP64(&channel->channelHandle); 4241 params.pChannelList = NV_PTR_TO_NvP64(hwChannelId); 4242 4243 return pRmApi->Control(pRmApi, 4244 channel->tsg->vaSpace->device->session->handle, 4245 channel->tsg->vaSpace->device->handle, 4246 NV0080_CTRL_CMD_FIFO_GET_CHANNELLIST, 4247 ¶ms, 4248 sizeof(params)); 4249 } 4250 4251 static void gpuDeviceUnmapCpuFreeHandle(struct gpuDevice *device, 4252 NvHandle handle, 4253 void *ptr, 4254 NvU32 flags) 4255 { 4256 RM_API *pRmApi = rmapiGetInterface(RMAPI_EXTERNAL_KERNEL); 4257 struct gpuSession *session = device->session; 4258 4259 // Unmap the pointer 4260 if (ptr) 4261 { 4262 NV_STATUS status; 4263 const NvU32 pid = osGetCurrentProcess(); 4264 4265 status = pRmApi->UnmapFromCpu(pRmApi, session->handle, device->subhandle, handle, ptr, flags, pid); 4266 NV_ASSERT(status == NV_OK); 4267 } 4268 4269 // Free the handle 4270 if (handle) 4271 pRmApi->Free(pRmApi, session->handle, handle); 4272 } 4273 4274 static void gpuDeviceDestroyUsermodeRegion(struct gpuDevice *device) 4275 { 4276 subDeviceDesc *rmSubDevice = device->rmSubDevice; 4277 4278 gpuDeviceUnmapCpuFreeHandle(device, 4279 rmSubDevice->clientRegionHandle, 4280 (void *)rmSubDevice->clientRegionMapping, 4281 0); 4282 } 4283 4284 static NV_STATUS gpuDeviceMapUsermodeRegion(struct gpuDevice *device) 4285 { 4286 NV_STATUS status = NV_OK; 4287 NvHandle regionHandle = 0; 4288 RM_API *pRmApi = rmapiGetInterface(RMAPI_EXTERNAL_KERNEL); 4289 struct gpuSession *session = device->session; 4290 subDeviceDesc *rmSubDevice = device->rmSubDevice; 4291 NvU32 usermodeClass = VOLTA_USERMODE_A; 4292 void *pParams = NULL; 4293 NvU32 paramsSize = 0; 4294 NV_HOPPER_USERMODE_A_PARAMS hopperParams = 4295 { 4296 .bBar1Mapping = NV_TRUE, 4297 .bPriv = NV_FALSE 4298 }; 4299 4300 if (isDeviceHopperPlus(device)) 4301 { 4302 usermodeClass = HOPPER_USERMODE_A; 4303 pParams = &hopperParams; 4304 paramsSize = sizeof(hopperParams); 4305 } 4306 4307 NV_ASSERT(isDeviceVoltaPlus(device)); 4308 NV_ASSERT(rmSubDevice->clientRegionHandle == 0 && rmSubDevice->clientRegionMapping == NULL); 4309 4310 regionHandle = NV01_NULL_OBJECT; 4311 status = pRmApi->Alloc(pRmApi, 4312 session->handle, 4313 device->subhandle, 4314 ®ionHandle, 4315 usermodeClass, 4316 pParams, 4317 paramsSize); 4318 4319 if (NV_OK != status) 4320 return status; 4321 4322 status = pRmApi->MapToCpu(pRmApi, 4323 session->handle, 4324 device->subhandle, 4325 regionHandle, 4326 0, 4327 NVC361_NV_USERMODE__SIZE, 4328 (void **)(&rmSubDevice->clientRegionMapping), 4329 DRF_DEF(OS33, _FLAGS, _ACCESS, _WRITE_ONLY)); 4330 if (NV_OK != status) 4331 goto failure_case; 4332 4333 rmSubDevice->clientRegionHandle = regionHandle; 4334 return status; 4335 4336 failure_case: 4337 pRmApi->Free(pRmApi, device->session->handle, regionHandle); 4338 return status; 4339 } 4340 4341 // 4342 // In Volta+, a channel can submit work by "ringing a doorbell" on the gpu after 4343 // updating the GP_PUT. The doorbell is a register mapped in the client's address 4344 // space and can be shared by all channels in that address space. Each channel writes 4345 // a channel-specific token to the doorbell to trigger the work. 4346 // 4347 static NV_STATUS nvGpuOpsGetWorkSubmissionInfo(struct gpuAddressSpace *vaSpace, 4348 struct gpuChannel *channel) 4349 { 4350 NV_STATUS status = NV_OK; 4351 NVC36F_CTRL_CMD_GPFIFO_GET_WORK_SUBMIT_TOKEN_PARAMS params = {0}; 4352 RM_API *pRmApi = rmapiGetInterface(RMAPI_EXTERNAL_KERNEL); 4353 struct gpuDevice *device = vaSpace->device; 4354 struct gpuSession *session = device->session; 4355 subDeviceDesc *rmSubDevice = device->rmSubDevice; 4356 4357 // Only valid for VOLTA+ (sub)Devices. 4358 NV_ASSERT(isDeviceVoltaPlus(vaSpace->device)); 4359 4360 // Now get the token for submission on given channel. 4361 status = pRmApi->Control(pRmApi, 4362 session->handle, 4363 channel->channelHandle, 4364 NVC36F_CTRL_CMD_GPFIFO_GET_WORK_SUBMIT_TOKEN, 4365 ¶ms, 4366 sizeof(params)); 4367 if (status != NV_OK) 4368 return status; 4369 4370 channel->workSubmissionOffset = (NvU32 *)((NvU8*)rmSubDevice->clientRegionMapping + NVC361_NOTIFY_CHANNEL_PENDING); 4371 channel->workSubmissionToken = params.workSubmitToken; 4372 4373 // 4374 // pWorkSubmissionToken cannot be NULL even if errorNotifier is NULL. 4375 // errorNotifier is checked for NULL previously, so just an assert is 4376 // sufficient. 4377 // 4378 NV_ASSERT_OR_RETURN((channel->errorNotifier != NULL), NV_ERR_INVALID_POINTER); 4379 4380 channel->pWorkSubmissionToken = 4381 (NvU32 *)((NvU8 *)channel->errorNotifier + 4382 (NV_CHANNELGPFIFO_NOTIFICATION_TYPE_WORK_SUBMIT_TOKEN * sizeof(NvNotification)) + 4383 NV_OFFSETOF(NvNotification, info32)); 4384 4385 { 4386 OBJGPU *pGpu; 4387 RsClient *pClient; 4388 Device *pDevice; 4389 4390 status = serverGetClientUnderLock(&g_resServ, session->handle, &pClient); 4391 if (status != NV_OK) 4392 return status; 4393 4394 status = deviceGetByHandle(pClient, device->handle, &pDevice); 4395 if (status != NV_OK) 4396 return status; 4397 4398 pGpu = GPU_RES_GET_GPU(pDevice); 4399 4400 // Map the usermode region in channel's vaspace 4401 if (gpuIsCCFeatureEnabled(pGpu)) 4402 { 4403 NvU64 vaOffset = 0; 4404 NvU64 gpuOffset = 0; 4405 gpuVaAllocInfo vaAllocInfo = {0}; 4406 struct allocFlags flags = {0}; 4407 4408 NV_ASSERT(isDeviceHopperPlus(device)); 4409 4410 status = nvGpuOpsAllocVirtual(vaSpace, NVC361_NV_USERMODE__SIZE, &vaOffset, 4411 rmSubDevice->clientRegionHandle, 4412 flags, &vaAllocInfo); 4413 if (status != NV_OK) 4414 return status; 4415 4416 status = nvGpuOpsMapGpuMemory(vaSpace, vaOffset, NVC361_NV_USERMODE__SIZE, 4417 vaAllocInfo.pageSize, &gpuOffset, flags); 4418 if (status != NV_OK) 4419 { 4420 nvGpuOpsFreeVirtual(vaSpace, vaOffset); 4421 return status; 4422 } 4423 4424 channel->bClientRegionGpuMappingNeeded = NV_TRUE; 4425 channel->clientRegionGpuAddr = gpuOffset; 4426 } 4427 } 4428 4429 return status; 4430 } 4431 4432 static NvBool channelNeedsDummyAlloc(struct gpuChannel *channel) 4433 { 4434 if (deviceNeedsDummyAlloc(channel->tsg->vaSpace->device)) 4435 { 4436 return isDeviceHopperPlus(channel->tsg->vaSpace->device) ? 4437 channel->gpPutLoc == UVM_BUFFER_LOCATION_VID : 4438 channel->gpPutLoc == UVM_BUFFER_LOCATION_SYS; 4439 } 4440 4441 return NV_FALSE; 4442 } 4443 4444 static NV_STATUS channelRetainDummyAlloc(struct gpuChannel *channel, gpuChannelInfo *channelInfo) 4445 { 4446 struct gpuAddressSpace *vaSpace = channel->tsg->vaSpace; 4447 NV_STATUS status; 4448 4449 if (!channelNeedsDummyAlloc(channel)) 4450 return NV_OK; 4451 4452 status = nvGpuOpsVaSpaceRetainDummyAlloc(vaSpace); 4453 if (status != NV_OK) 4454 return status; 4455 4456 channel->retainedDummyAlloc = NV_TRUE; 4457 channelInfo->dummyBar1Mapping = vaSpace->dummyGpuAlloc.cpuAddr; 4458 4459 return NV_OK; 4460 } 4461 4462 static void channelReleaseDummyAlloc(struct gpuChannel *channel) 4463 { 4464 if (channel != NULL && channel->retainedDummyAlloc) 4465 { 4466 NV_ASSERT(channelNeedsDummyAlloc(channel)); 4467 nvGpuOpsVaSpaceReleaseDummyAlloc(channel->tsg->vaSpace); 4468 } 4469 } 4470 4471 static RM_ENGINE_TYPE tsgEngineType(const struct gpuTsg *tsg) 4472 { 4473 NV_ASSERT(tsg->engineType == UVM_GPU_CHANNEL_ENGINE_TYPE_CE || tsg->engineType == UVM_GPU_CHANNEL_ENGINE_TYPE_SEC2); 4474 4475 if (tsg->engineType == UVM_GPU_CHANNEL_ENGINE_TYPE_SEC2) 4476 return RM_ENGINE_TYPE_SEC2; 4477 else 4478 return RM_ENGINE_TYPE_COPY(tsg->engineIndex); 4479 } 4480 4481 static NV_STATUS channelAllocate(const gpuTsgHandle tsg, 4482 const gpuChannelAllocParams *params, 4483 struct gpuChannel **channelHandle, 4484 gpuChannelInfo *channelInfo) 4485 { 4486 NV_STATUS status; 4487 nvGpuOpsLockSet acquiredLocks; 4488 struct gpuAddressSpace *vaSpace = NULL; 4489 struct gpuChannel *channel = NULL; 4490 struct gpuDevice *device = NULL; 4491 struct gpuSession *session = NULL; 4492 void *cpuMap = NULL; 4493 NvHandle hErrorNotifier; 4494 NvHandle hTsg; 4495 struct ChannelAllocInfo *pAllocInfo = NULL; 4496 void *gpfifoCtrl = NULL; 4497 PCLI_DMA_MAPPING_INFO pDmaMappingInfo = NULL; 4498 struct allocFlags flags = {0}; 4499 OBJGPU *pGpu = NULL; 4500 KernelFifo *pKernelFifo = NULL; 4501 NvU32 pid = osGetCurrentProcess(); 4502 NvU32 subdeviceInstance; 4503 UVM_BUFFER_LOCATION gpFifoLoc; 4504 UVM_BUFFER_LOCATION gpPutLoc; 4505 NvLength gpFifoSize, errorNotifierSize; 4506 RM_API *pRmApi = rmapiGetInterface(RMAPI_EXTERNAL_KERNEL); 4507 RsClient *pClient; 4508 4509 if (params->numGpFifoEntries == 0) 4510 return NV_ERR_INVALID_ARGUMENT; 4511 4512 vaSpace = tsg->vaSpace; 4513 device = vaSpace->device; 4514 NV_ASSERT(device); 4515 session = device->session; 4516 NV_ASSERT(session); 4517 4518 // Set location defaults 4519 gpFifoLoc = UVM_BUFFER_LOCATION_SYS; 4520 if (device->fbInfo.bZeroFb) 4521 gpPutLoc = UVM_BUFFER_LOCATION_SYS; 4522 else 4523 gpPutLoc = UVM_BUFFER_LOCATION_VID; 4524 4525 if (isDeviceVoltaPlus(device)) 4526 { 4527 if (params->gpFifoLoc > UVM_BUFFER_LOCATION_VID) 4528 return NV_ERR_INVALID_ARGUMENT; 4529 if (params->gpPutLoc > UVM_BUFFER_LOCATION_VID) 4530 return NV_ERR_INVALID_ARGUMENT; 4531 4532 if (params->gpFifoLoc != UVM_BUFFER_LOCATION_DEFAULT) 4533 gpFifoLoc = params->gpFifoLoc; 4534 if (params->gpPutLoc != UVM_BUFFER_LOCATION_DEFAULT) 4535 gpPutLoc = params->gpPutLoc; 4536 } 4537 else 4538 { 4539 // GPFIFO needs to be placed in sysmem on Pascal and 4540 // pre-Pascal devices (Bug 1750713) 4541 if (params->gpFifoLoc != UVM_BUFFER_LOCATION_DEFAULT || params->gpPutLoc != UVM_BUFFER_LOCATION_DEFAULT) 4542 return NV_ERR_INVALID_ARGUMENT; 4543 } 4544 4545 // TODO: Acquired because CliSetGpuContext expects RMAPI lock. Necessary? 4546 status = rmapiLockAcquire(RMAPI_LOCK_FLAGS_READ, RM_LOCK_MODULES_GPU_OPS); 4547 if (status != NV_OK) 4548 return status; 4549 status = CliSetGpuContext(session->handle, device->handle, &pGpu, NULL); 4550 rmapiLockRelease(); 4551 if (status != NV_OK) 4552 return status; 4553 4554 pAllocInfo = portMemAllocNonPaged(sizeof(*pAllocInfo)); 4555 if (pAllocInfo == NULL) 4556 return NV_ERR_NO_MEMORY; 4557 4558 portMemSet(pAllocInfo, 0, sizeof(*pAllocInfo)); 4559 4560 subdeviceInstance = gpumgrGetSubDeviceInstanceFromGpu(pGpu); 4561 4562 channel = portMemAllocNonPaged(sizeof(*channel)); 4563 if (channel == NULL) 4564 { 4565 status = NV_ERR_NO_MEMORY; 4566 goto cleanup_free_memory; 4567 } 4568 4569 portMemSet(channel, 0, sizeof(*channel)); 4570 4571 channel->tsg = tsg; 4572 channel->fifoEntries = params->numGpFifoEntries; 4573 channel->gpFifoLoc = gpFifoLoc; 4574 channel->gpPutLoc = gpPutLoc; 4575 4576 gpFifoSize = (NvLength)params->numGpFifoEntries * NVA06F_GP_ENTRY__SIZE; 4577 4578 // If the allocation is vidmem ask RM to allocate persistent vidmem 4579 pAllocInfo->gpuAllocInfo.bPersistentVidmem = NV_TRUE; 4580 4581 if (gpuIsCCorApmFeatureEnabled(pGpu)) 4582 { 4583 // Gpfifo can be placed in one of the following locations 4584 // 1. Unprotected sysmem in case of both APM and HCC 4585 // 2. Unprotected vidmem in case of APM 4586 // 3. Protected vidmem in case of HCC 4587 if ((gpFifoLoc == UVM_BUFFER_LOCATION_SYS) || gpuIsApmFeatureEnabled(pGpu)) 4588 { 4589 pAllocInfo->gpuAllocInfo.bUnprotected = NV_TRUE; 4590 } 4591 else 4592 { 4593 pAllocInfo->gpuAllocInfo.bUnprotected = NV_FALSE; 4594 } 4595 } 4596 4597 // 1. Allocate the GPFIFO entries. Dont pass any special flags. 4598 flags.bGetKernelVA = NV_FALSE; 4599 status = nvGpuOpsGpuMalloc(vaSpace, 4600 gpFifoLoc == UVM_BUFFER_LOCATION_SYS, 4601 gpFifoSize, 4602 &channel->gpFifo, 4603 flags, 4604 &pAllocInfo->gpuAllocInfo); 4605 if (status != NV_OK) 4606 goto cleanup_free_memory; 4607 4608 // 2. Map the gpfifo entries 4609 // Skip this whenever HCC is enabled and GPFIFO is in vidmem. CPU access 4610 // to vidmem is blocked in that scenario 4611 if (!gpuIsCCFeatureEnabled(pGpu) || (gpFifoLoc == UVM_BUFFER_LOCATION_SYS)) 4612 { 4613 status = nvGpuOpsMemoryCpuMap(vaSpace, 4614 channel->gpFifo, 4615 gpFifoSize, 4616 &cpuMap, 4617 PAGE_SIZE_DEFAULT); 4618 if (status != NV_OK) 4619 goto cleanup_free_gpfifo_entries; 4620 } 4621 4622 channel->gpFifoEntries = (NvU64 *) cpuMap; 4623 4624 // 4625 // 3. Allocate memory for the error notifier. Make the allocation 4626 // sufficiently large to also accommodate any other channel 4627 // notifiers, and request a kernel VA and CPU caching. 4628 // 4629 if (gpuIsCCorApmFeatureEnabled(pGpu)) 4630 { 4631 // Put notifier in unprotected sysmem 4632 pAllocInfo->gpuAllocInfo.bUnprotected = NV_TRUE; 4633 } 4634 4635 flags.bGetKernelVA = NV_TRUE; 4636 errorNotifierSize = sizeof(NvNotification) * 4637 NV_CHANNELGPFIFO_NOTIFICATION_TYPE__SIZE_1; 4638 status = nvGpuOpsGpuMalloc(vaSpace, 4639 NV_TRUE, 4640 errorNotifierSize, 4641 &channel->errorNotifierOffset, 4642 flags, 4643 &pAllocInfo->gpuAllocInfo); 4644 if (status != NV_OK) 4645 goto cleanup_unmap_gpfifo_entries; 4646 4647 NV_ASSERT(channel->errorNotifierOffset); 4648 4649 status = getHandleForVirtualAddr(vaSpace, 4650 channel->errorNotifierOffset, 4651 NV_FALSE /*virtual*/, 4652 &hErrorNotifier); 4653 if (status != NV_OK) 4654 goto cleanup_free_virtual; 4655 4656 // 4. Find and share the VA with UVM driver 4657 4658 // TODO: Acquired because CliGetDmaMappingInfo expects RMAPI lock. Necessary? 4659 status = _nvGpuOpsLocksAcquire(RMAPI_LOCK_FLAGS_READ, session->handle, &pClient, 0, 4660 0, 0, &acquiredLocks); 4661 if (status != NV_OK) 4662 goto cleanup_free_virtual; 4663 4664 if (!CliGetDmaMappingInfo(pClient, 4665 device->handle, 4666 hErrorNotifier, 4667 channel->errorNotifierOffset, 4668 gpumgrGetDeviceGpuMask(device->deviceInstance), 4669 &pDmaMappingInfo)) 4670 { 4671 _nvGpuOpsLocksRelease(&acquiredLocks); 4672 status = NV_ERR_GENERIC; 4673 goto cleanup_free_virtual; 4674 } 4675 4676 _nvGpuOpsLocksRelease(&acquiredLocks); 4677 4678 // 4679 // RM uses the parent subdevice index to fill the notifier on SYSMEM. So use the same. 4680 // NOTE: the same assumption does not hold for VIDMEM allocations. 4681 // 4682 channel->errorNotifier = (NvNotification*)pDmaMappingInfo->KernelVAddr[subdeviceInstance]; 4683 if (!channel->errorNotifier) 4684 { 4685 status = NV_ERR_GENERIC; 4686 goto cleanup_free_virtual; 4687 } 4688 4689 // Let's allocate the channel 4690 pAllocInfo->gpFifoAllocParams.hObjectError = hErrorNotifier; 4691 status = getHandleForVirtualAddr(vaSpace, 4692 channel->gpFifo, 4693 NV_FALSE /*virtual*/, 4694 &pAllocInfo->gpFifoAllocParams.hObjectBuffer); 4695 if (status != NV_OK) 4696 goto cleanup_free_virtual; 4697 4698 pAllocInfo->gpFifoAllocParams.gpFifoOffset = channel->gpFifo; 4699 pAllocInfo->gpFifoAllocParams.gpFifoEntries = channel->fifoEntries; 4700 4701 if (isDeviceVoltaPlus(device)) 4702 { 4703 if (gpuIsCCorApmFeatureEnabled(pGpu)) 4704 { 4705 // All channels are allocated as secure when the Confidential 4706 // Computing feature is enabled. 4707 pAllocInfo->gpFifoAllocParams.flags = FLD_SET_DRF(OS04, _FLAGS, _CC_SECURE, _TRUE, 4708 pAllocInfo->gpFifoAllocParams.flags); 4709 4710 // USERD can be placed in one of the following locations 4711 // 1. Unprotected sysmem in case of both APM and HCC 4712 // 2. Unprotected vidmem in case of APM 4713 // 3. Protected vidmem in case of HCC 4714 if ((gpPutLoc == UVM_BUFFER_LOCATION_SYS) || gpuIsApmFeatureEnabled(pGpu)) 4715 { 4716 pAllocInfo->gpuAllocInfo.bUnprotected = NV_TRUE; 4717 } 4718 else 4719 { 4720 pAllocInfo->gpuAllocInfo.bUnprotected = NV_FALSE; 4721 } 4722 } 4723 4724 flags.bGetKernelVA = NV_FALSE; 4725 status = nvGpuOpsGpuMalloc(vaSpace, 4726 gpPutLoc == UVM_BUFFER_LOCATION_SYS, 4727 sizeof(KeplerAControlGPFifo), 4728 &channel->userdGpuAddr, 4729 flags, 4730 &pAllocInfo->gpuAllocInfo); 4731 if (status != NV_OK) 4732 goto cleanup_free_virtual; 4733 4734 channel->hUserdPhysHandle = pAllocInfo->gpuAllocInfo.hPhysHandle; 4735 4736 SLI_LOOP_START(SLI_LOOP_FLAGS_BC_ONLY) 4737 pAllocInfo->gpFifoAllocParams.hUserdMemory[gpumgrGetSubDeviceInstanceFromGpu(pGpu)] = channel->hUserdPhysHandle; 4738 pAllocInfo->gpFifoAllocParams.userdOffset[gpumgrGetSubDeviceInstanceFromGpu(pGpu)] = 0; 4739 SLI_LOOP_END 4740 4741 // Skip this whenever HCC is enabled and USERD is in vidmem. CPU access 4742 // to vidmem is blocked in that scenario. 4743 if (!gpuIsCCFeatureEnabled(pGpu) || (gpPutLoc == UVM_BUFFER_LOCATION_SYS)) 4744 { 4745 status = nvGpuOpsMemoryCpuMap(vaSpace, 4746 channel->userdGpuAddr, 4747 sizeof(KeplerAControlGPFifo), 4748 &gpfifoCtrl, 4749 PAGE_SIZE_DEFAULT); 4750 if (status != NV_OK) 4751 goto cleanup_free_virtual; 4752 } 4753 } 4754 4755 pAllocInfo->gpFifoAllocParams.engineType = gpuGetNv2080EngineType(tsgEngineType(channel->tsg)); 4756 4757 if (channel->tsg->isFakeTsg) 4758 { 4759 // The internal RM TSG requires a valid vaSpace object. 4760 pAllocInfo->gpFifoAllocParams.hVASpace = vaSpace->handle; 4761 4762 // Not a Tsg, device handle parents a channel when RM internal TSG is 4763 // used. 4764 hTsg = device->handle; 4765 } 4766 else 4767 { 4768 // If zero then it will attach to the TSG address space. 4769 pAllocInfo->gpFifoAllocParams.hVASpace = NV01_NULL_OBJECT; 4770 hTsg = channel->tsg->tsgHandle; 4771 } 4772 4773 channel->channelHandle = NV01_NULL_OBJECT; 4774 status = pRmApi->Alloc(pRmApi, 4775 session->handle, 4776 hTsg, 4777 &channel->channelHandle, 4778 device->hostClass, 4779 &pAllocInfo->gpFifoAllocParams, 4780 sizeof(pAllocInfo->gpFifoAllocParams)); 4781 if (status != NV_OK) 4782 { 4783 goto cleanup_free_virtual; 4784 } 4785 4786 // Query runlist ID 4787 pKernelFifo = GPU_GET_KERNEL_FIFO(pGpu); 4788 status = kfifoEngineInfoXlate_HAL(pGpu, 4789 pKernelFifo, 4790 ENGINE_INFO_TYPE_RM_ENGINE_TYPE, 4791 (NvU32)tsgEngineType(channel->tsg), 4792 ENGINE_INFO_TYPE_RUNLIST, 4793 &channel->hwRunlistId); 4794 if (status != NV_OK) 4795 goto cleanup_free_virtual; 4796 4797 // Query channel ID 4798 status = nvGpuOpsChannelGetHwChannelId(channel, &channel->hwChannelId); 4799 if (status != NV_OK) 4800 goto cleanup_free_channel; 4801 4802 // Map USERD (controlPage) 4803 if (!isDeviceVoltaPlus(device)) 4804 { 4805 status = pRmApi->MapToCpu(pRmApi, 4806 session->handle, 4807 device->subhandle, 4808 channel->channelHandle, 4809 0, 4810 sizeof(KeplerAControlGPFifo), 4811 &gpfifoCtrl, 4812 0); 4813 if (status != NV_OK) 4814 goto cleanup_free_channel; 4815 } 4816 4817 channel->controlPage = gpfifoCtrl; 4818 4819 // We create a BAR1 pointer inside channelRetainDummyAlloc and issue reads 4820 // on the same to push pending BAR1 writes to vidmem. With HCC, BAR1 access 4821 // to vidmem is blocked and hence there is no point creating the pointer 4822 if (!gpuIsCCFeatureEnabled(pGpu)) 4823 { 4824 status = channelRetainDummyAlloc(channel, channelInfo); 4825 if (status != NV_OK) 4826 goto cleanup_free_controlpage; 4827 } 4828 4829 // Allocate the SW method class for fault cancel 4830 if (isDevicePascalPlus(device) && (channel->tsg->engineType != UVM_GPU_CHANNEL_ENGINE_TYPE_SEC2)) 4831 { 4832 channel->hFaultCancelSwMethodClass = NV01_NULL_OBJECT; 4833 status = pRmApi->Alloc(pRmApi, 4834 session->handle, 4835 channel->channelHandle, 4836 &channel->hFaultCancelSwMethodClass, 4837 GP100_UVM_SW, 4838 NULL, 4839 0); 4840 if (status != NV_OK) 4841 goto cleanup_free_controlpage; 4842 } 4843 4844 portMemFree(pAllocInfo); 4845 4846 *channelHandle = channel; 4847 channelInfo->gpGet = (channel->controlPage != NULL) ? &channel->controlPage->GPGet : NULL; 4848 channelInfo->gpPut = (channel->controlPage != NULL) ? &channel->controlPage->GPPut : NULL; 4849 channelInfo->gpFifoEntries = channel->gpFifoEntries; 4850 channelInfo->channelClassNum = device->hostClass; 4851 channelInfo->numGpFifoEntries = channel->fifoEntries; 4852 channelInfo->errorNotifier = channel->errorNotifier; 4853 channelInfo->hwRunlistId = channel->hwRunlistId; 4854 channelInfo->hwChannelId = channel->hwChannelId; 4855 4856 channelInfo->gpFifoGpuVa = channel->gpFifo; 4857 channelInfo->gpPutGpuVa = channel->userdGpuAddr + NV_OFFSETOF(KeplerAControlGPFifo, GPPut); 4858 channelInfo->gpGetGpuVa = channel->userdGpuAddr + NV_OFFSETOF(KeplerAControlGPFifo, GPGet); 4859 4860 return NV_OK; 4861 4862 cleanup_free_controlpage: 4863 if (!isDeviceVoltaPlus(device) && (gpfifoCtrl != NULL)) 4864 pRmApi->UnmapFromCpu(pRmApi, session->handle, device->subhandle, channel->channelHandle, gpfifoCtrl, 0, pid); 4865 cleanup_free_channel: 4866 pRmApi->Free(pRmApi, session->handle, channel->channelHandle); 4867 cleanup_free_virtual: 4868 if (isDeviceVoltaPlus(device)) 4869 { 4870 if (gpfifoCtrl != NULL) 4871 nvGpuOpsMemoryCpuUnMap(vaSpace, gpfifoCtrl); 4872 4873 if (channel->userdGpuAddr != 0) 4874 nvGpuOpsMemoryFree(vaSpace, channel->userdGpuAddr); 4875 } 4876 4877 nvGpuOpsMemoryFree(vaSpace, channel->errorNotifierOffset); 4878 cleanup_unmap_gpfifo_entries: 4879 nvGpuOpsMemoryCpuUnMap(vaSpace, channel->gpFifoEntries); 4880 cleanup_free_gpfifo_entries: 4881 nvGpuOpsMemoryFree(vaSpace, channel->gpFifo); 4882 cleanup_free_memory: 4883 channelReleaseDummyAlloc(channel); 4884 portMemFree(channel); 4885 portMemFree(pAllocInfo); 4886 4887 return status; 4888 } 4889 4890 static NV_STATUS engineAllocate(struct gpuChannel *channel, gpuChannelInfo *channelInfo, UVM_GPU_CHANNEL_ENGINE_TYPE engineType) 4891 { 4892 NV_STATUS status = NV_OK; 4893 NVB0B5_ALLOCATION_PARAMETERS ceAllocParams = {0}; 4894 NVA06F_CTRL_GPFIFO_SCHEDULE_PARAMS channelGrpParams = {0}; 4895 struct gpuAddressSpace *vaSpace = NULL; 4896 struct gpuDevice *device = NULL; 4897 struct gpuSession *session = NULL; 4898 RM_API *pRmApi = rmapiGetInterface(RMAPI_EXTERNAL_KERNEL); 4899 NvU32 class; 4900 NvU32 paramsSize; 4901 void *params; 4902 4903 NV_ASSERT(channel); 4904 NV_ASSERT(channelInfo); 4905 NV_ASSERT(engineType == UVM_GPU_CHANNEL_ENGINE_TYPE_CE || engineType == UVM_GPU_CHANNEL_ENGINE_TYPE_SEC2); 4906 4907 vaSpace = channel->tsg->vaSpace; 4908 NV_ASSERT(vaSpace); 4909 device = vaSpace->device; 4910 NV_ASSERT(device); 4911 session = device->session; 4912 NV_ASSERT(session); 4913 4914 if (engineType == UVM_GPU_CHANNEL_ENGINE_TYPE_CE) 4915 { 4916 ceAllocParams.version = NVB0B5_ALLOCATION_PARAMETERS_VERSION_1; 4917 ceAllocParams.engineType = NV2080_ENGINE_TYPE_COPY(channel->tsg->engineIndex); 4918 params = &ceAllocParams; 4919 paramsSize = sizeof(ceAllocParams); 4920 class = device->ceClass; 4921 } 4922 else 4923 { 4924 params = NULL; 4925 paramsSize = 0; 4926 class = device->sec2Class; 4927 } 4928 4929 channel->engineHandle = NV01_NULL_OBJECT; 4930 status = pRmApi->Alloc(pRmApi, session->handle, 4931 channel->channelHandle, 4932 &channel->engineHandle, 4933 class, 4934 params, 4935 paramsSize); 4936 4937 if (status != NV_OK) 4938 return status; 4939 4940 // In volta+ gpus, the channel has a submission offset used as doorbell. 4941 if (isDeviceVoltaPlus(device)) 4942 { 4943 status = nvGpuOpsGetWorkSubmissionInfo(vaSpace, channel); 4944 if (status != NV_OK) 4945 goto cleanup_free_engine; 4946 4947 channelInfo->workSubmissionOffset = channel->workSubmissionOffset; 4948 channelInfo->workSubmissionToken = channel->workSubmissionToken; 4949 channelInfo->pWorkSubmissionToken = channel->pWorkSubmissionToken; 4950 if (channel->bClientRegionGpuMappingNeeded) 4951 { 4952 channelInfo->workSubmissionOffsetGpuVa = channel->clientRegionGpuAddr + 4953 NVC361_NOTIFY_CHANNEL_PENDING; 4954 } 4955 } 4956 4957 // Schedule the channel 4958 channelGrpParams.bEnable = NV_TRUE; 4959 status = pRmApi->Control(pRmApi, 4960 session->handle, 4961 channel->channelHandle, 4962 NVA06F_CTRL_CMD_GPFIFO_SCHEDULE, 4963 &channelGrpParams, 4964 sizeof(channelGrpParams)); 4965 4966 if (status != NV_OK) 4967 goto cleanup_free_engine; 4968 4969 return NV_OK; 4970 4971 cleanup_free_engine: 4972 pRmApi->Free(pRmApi, session->handle, channel->engineHandle); 4973 channel->engineHandle = NV01_NULL_OBJECT; 4974 return status; 4975 } 4976 4977 NV_STATUS nvGpuOpsChannelAllocate(const gpuTsgHandle tsg, 4978 const gpuChannelAllocParams *params, 4979 struct gpuChannel **channelHandle, 4980 gpuChannelInfo *channelInfo) 4981 { 4982 NV_STATUS status; 4983 UVM_GPU_CHANNEL_ENGINE_TYPE channelType; 4984 4985 if (!tsg || !channelHandle || !params || !channelInfo) 4986 return NV_ERR_INVALID_ARGUMENT; 4987 4988 channelType = tsg->engineType; 4989 NV_ASSERT(channelType == UVM_GPU_CHANNEL_ENGINE_TYPE_CE || channelType == UVM_GPU_CHANNEL_ENGINE_TYPE_SEC2); 4990 4991 status = channelAllocate(tsg, params, channelHandle, channelInfo); 4992 if (status != NV_OK) 4993 return status; 4994 4995 status = engineAllocate(*channelHandle, channelInfo, channelType); 4996 if (status != NV_OK) 4997 goto cleanup_free_channel; 4998 4999 return NV_OK; 5000 5001 cleanup_free_channel: 5002 nvGpuOpsChannelDestroy(*channelHandle); 5003 *channelHandle = NULL; 5004 return status; 5005 5006 } 5007 5008 void nvGpuOpsChannelDestroy(struct gpuChannel *channel) 5009 { 5010 NvU32 pid = osGetCurrentProcess(); 5011 struct gpuAddressSpace *vaSpace = NULL; 5012 struct gpuDevice *device = NULL; 5013 struct gpuSession *session = NULL; 5014 RM_API *pRmApi = rmapiGetInterface(RMAPI_EXTERNAL_KERNEL); 5015 5016 if (!channel) 5017 return; 5018 5019 NV_ASSERT(channel->tsg); 5020 vaSpace = channel->tsg->vaSpace; 5021 NV_ASSERT(vaSpace); 5022 device = vaSpace->device; 5023 NV_ASSERT(device); 5024 session = device->session; 5025 NV_ASSERT(session); 5026 5027 // destroy the engine associated with the channel. 5028 if (channel->engineHandle != NV01_NULL_OBJECT) 5029 pRmApi->Free(pRmApi, session->handle, channel->engineHandle); 5030 5031 // Tear down the channel 5032 if (isDevicePascalPlus(device)) 5033 pRmApi->Free(pRmApi, session->handle, channel->hFaultCancelSwMethodClass); 5034 5035 if (isDeviceVoltaPlus(device)) 5036 { 5037 nvGpuOpsMemoryCpuUnMap(vaSpace, (void *)channel->controlPage); 5038 nvGpuOpsMemoryFree(vaSpace, channel->userdGpuAddr); 5039 } 5040 else 5041 { 5042 pRmApi->UnmapFromCpu(pRmApi, 5043 session->handle, 5044 device->subhandle, 5045 channel->channelHandle, 5046 (void *)channel->controlPage, 5047 0, 5048 pid); 5049 } 5050 5051 if (channel->bClientRegionGpuMappingNeeded) 5052 { 5053 NV_ASSERT(isDeviceHopperPlus(device)); 5054 nvGpuOpsUnmapGpuMemory(vaSpace, channel->clientRegionGpuAddr); 5055 nvGpuOpsFreeVirtual(vaSpace, channel->clientRegionGpuAddr); 5056 } 5057 5058 // Free the channel 5059 pRmApi->Free(pRmApi, session->handle, channel->channelHandle); 5060 5061 nvGpuOpsMemoryFree(vaSpace, channel->errorNotifierOffset); 5062 5063 nvGpuOpsMemoryCpuUnMap(vaSpace, channel->gpFifoEntries); 5064 5065 nvGpuOpsMemoryFree(vaSpace, channel->gpFifo); 5066 5067 channelReleaseDummyAlloc(channel); 5068 5069 portMemFree(channel); 5070 } 5071 5072 NV_STATUS nvGpuOpsTsgAllocate(struct gpuAddressSpace *vaSpace, 5073 const gpuTsgAllocParams *params, 5074 struct gpuTsg **tsgHandle) 5075 { 5076 NV_STATUS status; 5077 struct gpuDevice *device = NULL; 5078 struct gpuSession *session = NULL; 5079 struct gpuTsg *tsg = NULL; 5080 RM_API *pRmApi = rmapiGetInterface(RMAPI_EXTERNAL_KERNEL); 5081 NV_CHANNEL_GROUP_ALLOCATION_PARAMETERS tsgParams = { 0 }; 5082 UVM_GPU_CHANNEL_ENGINE_TYPE engineType; 5083 5084 if (!vaSpace || !params || !tsgHandle) 5085 return NV_ERR_INVALID_ARGUMENT; 5086 5087 engineType = params->engineType; 5088 5089 if (engineType != UVM_GPU_CHANNEL_ENGINE_TYPE_CE && 5090 engineType != UVM_GPU_CHANNEL_ENGINE_TYPE_SEC2) 5091 return NV_ERR_INVALID_ARGUMENT; 5092 5093 tsg = portMemAllocNonPaged(sizeof(*tsg)); 5094 if (tsg == NULL) 5095 return NV_ERR_NO_MEMORY; 5096 5097 portMemSet(tsg, 0, sizeof(*tsg)); 5098 5099 device = vaSpace->device; 5100 NV_ASSERT(device); 5101 session = device->session; 5102 NV_ASSERT(session); 5103 5104 tsg->vaSpace = vaSpace; 5105 tsg->engineType = engineType; 5106 tsg->engineIndex = params->engineIndex; 5107 5108 // TSG is supported for any engine type starting on Volta. Prior to Volta 5109 // only GR/compute channels use TSGs. nvGpuOps only allocates channels/TSGs 5110 // for CE and SEC2 engine types. 5111 tsg->isFakeTsg = !isDeviceVoltaPlus(device); 5112 tsg->tsgHandle = NV01_NULL_OBJECT; 5113 5114 if (tsg->isFakeTsg) 5115 { 5116 *tsgHandle = tsg; 5117 return NV_OK; 5118 } 5119 5120 tsgParams.hVASpace = vaSpace->handle; 5121 tsgParams.engineType = gpuGetNv2080EngineType(tsgEngineType(tsg)); 5122 5123 status = pRmApi->Alloc(pRmApi, 5124 session->handle, 5125 device->handle, 5126 &tsg->tsgHandle, 5127 KEPLER_CHANNEL_GROUP_A, 5128 &tsgParams, 5129 sizeof(tsgParams)); 5130 if (status != NV_OK) 5131 goto cleanup_free_tsg; 5132 5133 *tsgHandle = tsg; 5134 5135 return NV_OK; 5136 5137 cleanup_free_tsg: 5138 portMemFree(tsg); 5139 5140 return status; 5141 } 5142 5143 void nvGpuOpsTsgDestroy(struct gpuTsg *tsg) 5144 { 5145 if (!tsg) 5146 return; 5147 5148 // RM takes care of freeing its internal TSG in the channel destruction 5149 // path. 5150 if (!tsg->isFakeTsg) 5151 { 5152 struct gpuAddressSpace *vaSpace = NULL; 5153 struct gpuDevice *device = NULL; 5154 struct gpuSession *session = NULL; 5155 RM_API *pRmApi = rmapiGetInterface(RMAPI_EXTERNAL_KERNEL); 5156 5157 vaSpace = tsg->vaSpace; 5158 NV_ASSERT(vaSpace); 5159 device = vaSpace->device; 5160 NV_ASSERT(device); 5161 session = device->session; 5162 NV_ASSERT(session); 5163 5164 // Free the TSG 5165 pRmApi->Free(pRmApi, session->handle, tsg->tsgHandle); 5166 } 5167 5168 portMemFree(tsg); 5169 } 5170 5171 static NV_STATUS trackDescriptor(PNODE *pRoot, NvU64 key, void *desc) 5172 { 5173 PNODE btreeNode; 5174 NV_ASSERT(desc); 5175 NV_ASSERT(pRoot); 5176 5177 btreeNode = (PNODE)desc; 5178 5179 btreeNode->keyStart = key; 5180 btreeNode->keyEnd = key; 5181 btreeNode->Data = desc; 5182 return btreeInsert(btreeNode, pRoot); 5183 } 5184 5185 static NV_STATUS findDescriptor(PNODE pRoot, NvU64 key, void **desc) 5186 { 5187 PNODE btreeNode = NULL; 5188 NV_STATUS status = NV_OK; 5189 5190 NV_ASSERT(desc); 5191 5192 status = btreeSearch(key, &btreeNode, pRoot); 5193 if (status != NV_OK) 5194 return status; 5195 5196 *desc = btreeNode->Data; 5197 return NV_OK; 5198 } 5199 5200 static NV_STATUS deleteDescriptor(PNODE *pRoot, NvU64 key, void **desc) 5201 { 5202 PNODE btreeNode = NULL; 5203 NV_STATUS status = NV_OK; 5204 5205 NV_ASSERT(desc); 5206 NV_ASSERT(pRoot); 5207 5208 status = btreeSearch(key, &btreeNode, *pRoot); 5209 if (status != NV_OK) 5210 return status ; 5211 5212 *desc = btreeNode->Data; 5213 status = btreeUnlink(btreeNode, pRoot); 5214 return NV_OK; 5215 } 5216 5217 static NV_STATUS destroyAllGpuMemDescriptors(NvHandle hClient, PNODE pNode) 5218 { 5219 RM_API *pRmApi = rmapiGetInterface(RMAPI_EXTERNAL_KERNEL); 5220 gpuMemDesc *memDesc = NULL; 5221 5222 if (pNode == NULL) 5223 return NV_OK; 5224 5225 destroyAllGpuMemDescriptors(hClient, pNode->left); 5226 destroyAllGpuMemDescriptors(hClient, pNode->right); 5227 5228 memDesc = (gpuMemDesc*)pNode->Data; 5229 if (memDesc->childHandle) 5230 pRmApi->Free(pRmApi, hClient, memDesc->childHandle); 5231 5232 if (memDesc->handle) 5233 pRmApi->Free(pRmApi, hClient, memDesc->handle); 5234 5235 portMemFree(pNode->Data); 5236 5237 return NV_OK; 5238 } 5239 5240 // Returns childHandle/handle to a VA memdesc associated with a VA. 5241 static NV_STATUS getHandleForVirtualAddr(struct gpuAddressSpace *vaSpace, 5242 NvU64 allocationAddress, 5243 NvBool bPhysical, 5244 NvHandle *pHandle) 5245 { 5246 NV_STATUS status = NV_OK; 5247 gpuMemDesc *memDesc = NULL; 5248 5249 NV_ASSERT(vaSpace); 5250 NV_ASSERT(pHandle); 5251 5252 portSyncRwLockAcquireRead(vaSpace->allocationsLock); 5253 status = findDescriptor(vaSpace->allocations, allocationAddress, (void**)&memDesc); 5254 portSyncRwLockReleaseRead(vaSpace->allocationsLock); 5255 if (status != NV_OK) 5256 return status; 5257 5258 NV_ASSERT(memDesc); 5259 5260 *pHandle = bPhysical ? memDesc->childHandle : memDesc->handle; 5261 5262 if (!*pHandle) 5263 return NV_ERR_GENERIC; 5264 5265 return NV_OK; 5266 } 5267 5268 // 5269 // Returns a cpu mapping to the provided GPU Offset 5270 // 5271 NV_STATUS nvGpuOpsMemoryCpuMap(struct gpuAddressSpace *vaSpace, 5272 NvU64 memory, 5273 NvLength length, 5274 void **cpuPtr, 5275 NvU64 pageSize) 5276 { 5277 gpuMemDesc *memDesc = NULL; 5278 cpuMappingDesc *cpuMapDesc = NULL; 5279 NV_STATUS status; 5280 void *pMappedAddr = NULL; 5281 NvP64 mappedAddr = 0; 5282 NvU32 flags = 0; 5283 RM_API *pRmApi = rmapiGetInterface(RMAPI_EXTERNAL_KERNEL); 5284 5285 if (!vaSpace || !cpuPtr) 5286 return NV_ERR_INVALID_ARGUMENT; 5287 5288 cpuMapDesc = portMemAllocNonPaged(sizeof(*cpuMapDesc)); 5289 if (cpuMapDesc == NULL) 5290 return NV_ERR_GENERIC; 5291 5292 portSyncRwLockAcquireRead(vaSpace->allocationsLock); 5293 status = findDescriptor(vaSpace->allocations, memory, (void**)&memDesc); 5294 portSyncRwLockReleaseRead(vaSpace->allocationsLock); 5295 if (status != NV_OK) 5296 goto cleanup_desc; 5297 5298 NV_ASSERT(memDesc); 5299 NV_ASSERT(memDesc->childHandle); 5300 5301 // 5302 // Set correct page size for Bar mappings. 5303 // 5304 if (pageSize == RM_PAGE_SIZE) 5305 { 5306 flags |= DRF_DEF(OS46, _FLAGS, _PAGE_SIZE, _4KB); 5307 } 5308 else if (pageSize == RM_PAGE_SIZE_HUGE) 5309 { 5310 // TODO: this flag is ignored, remove it once it is deprecated 5311 flags |= DRF_DEF(OS46, _FLAGS, _PAGE_SIZE, _HUGE); 5312 } 5313 else 5314 { 5315 flags |= DRF_DEF(OS46, _FLAGS, _PAGE_SIZE, _DEFAULT); 5316 } 5317 5318 // 5319 // If the length passed in is zero we will force the mapping 5320 // to the size that was used for allocation of the passed in 5321 // NvU64 5322 // 5323 status = pRmApi->MapToCpu(pRmApi, 5324 vaSpace->device->session->handle, 5325 vaSpace->device->subhandle, 5326 memDesc->childHandle, 5327 0, 5328 length != 0 ? length : memDesc->size, 5329 &pMappedAddr, 5330 flags); 5331 if (status != NV_OK) 5332 goto cleanup_desc; 5333 5334 mappedAddr = NV_PTR_TO_NvP64(pMappedAddr); 5335 5336 cpuMapDesc->cpuPointer = (NvUPtr) mappedAddr; 5337 cpuMapDesc->handle = memDesc->childHandle; 5338 cpuMapDesc->btreeNode.keyStart = (NvU64)cpuMapDesc->cpuPointer; 5339 cpuMapDesc->btreeNode.keyEnd = (NvU64)cpuMapDesc->cpuPointer; 5340 cpuMapDesc->btreeNode.Data = (void *) cpuMapDesc; 5341 5342 // Track CPU memdesc 5343 portSyncRwLockAcquireWrite(vaSpace->cpuMappingsLock); 5344 status = btreeInsert(&cpuMapDesc->btreeNode, &vaSpace->cpuMappings); 5345 portSyncRwLockReleaseWrite(vaSpace->cpuMappingsLock); 5346 if (status != NV_OK) 5347 goto cleanup_desc; 5348 5349 // can use this address as key as Bar1 address space is unique 5350 *cpuPtr = NvP64_VALUE(mappedAddr); 5351 5352 return NV_OK; 5353 5354 cleanup_desc: 5355 portMemFree(cpuMapDesc); 5356 return status; 5357 } 5358 5359 void nvGpuOpsMemoryCpuUnMap(struct gpuAddressSpace *vaSpace, void *cpuPtr) 5360 { 5361 unsigned pid =0; 5362 cpuMappingDesc *mappingDesc = NULL; 5363 PNODE btreeNode; 5364 NV_STATUS status = NV_OK; 5365 RM_API *pRmApi = rmapiGetInterface(RMAPI_EXTERNAL_KERNEL); 5366 5367 if (!vaSpace || !cpuPtr) 5368 return; 5369 5370 portSyncRwLockAcquireRead(vaSpace->cpuMappingsLock); 5371 status = btreeSearch((NvUPtr)cpuPtr, &btreeNode, vaSpace->cpuMappings); 5372 portSyncRwLockReleaseRead(vaSpace->cpuMappingsLock); 5373 if (status != NV_OK) 5374 return; 5375 5376 mappingDesc = (cpuMappingDesc *)btreeNode->Data; 5377 if (mappingDesc) 5378 { 5379 pid = osGetCurrentProcess(); 5380 status = pRmApi->UnmapFromCpu(pRmApi, 5381 vaSpace->device->session->handle, 5382 vaSpace->device->subhandle, 5383 mappingDesc->handle, 5384 NvP64_VALUE(((NvP64)mappingDesc->cpuPointer)), 5385 0, 5386 pid); 5387 NV_ASSERT(status == NV_OK); 5388 } 5389 5390 portSyncRwLockAcquireWrite(vaSpace->cpuMappingsLock); 5391 btreeUnlink(btreeNode, &vaSpace->cpuMappings); 5392 portSyncRwLockReleaseWrite(vaSpace->cpuMappingsLock); 5393 5394 portMemFree(mappingDesc); 5395 return; 5396 } 5397 5398 // This function frees both physical and and virtual memory allocations 5399 // This is a counter-function of nvGpuOpsGpuMalloc! 5400 void nvGpuOpsMemoryFree(struct gpuAddressSpace *vaSpace, NvU64 pointer) 5401 { 5402 RM_API *pRmApi = rmapiGetInterface(RMAPI_EXTERNAL_KERNEL); 5403 gpuMemDesc *memDesc = NULL; 5404 5405 NV_ASSERT(vaSpace); 5406 5407 portSyncRwLockAcquireWrite(vaSpace->allocationsLock); 5408 deleteDescriptor(&vaSpace->allocations, pointer, (void**)&memDesc); 5409 portSyncRwLockReleaseWrite(vaSpace->allocationsLock); 5410 5411 NV_ASSERT(memDesc); 5412 NV_ASSERT(memDesc->childHandle); 5413 NV_ASSERT(memDesc->handle); 5414 5415 // Free physical allocation 5416 pRmApi->Free(pRmApi, vaSpace->device->session->handle, memDesc->childHandle); 5417 5418 // Free virtual allocation 5419 pRmApi->Free(pRmApi, vaSpace->device->session->handle, memDesc->handle); 5420 5421 portMemFree(memDesc); 5422 } 5423 5424 5425 5426 NV_STATUS nvGpuOpsQueryCesCaps(struct gpuDevice *device, 5427 gpuCesCaps *cesCaps) 5428 { 5429 NV_STATUS status; 5430 nvGpuOpsLockSet acquiredLocks; 5431 THREAD_STATE_NODE threadState; 5432 5433 if (!device || !cesCaps) 5434 return NV_ERR_INVALID_ARGUMENT; 5435 5436 threadStateInit(&threadState, THREAD_STATE_FLAGS_NONE); 5437 status = _nvGpuOpsLocksAcquireAll(RMAPI_LOCK_FLAGS_READ, 5438 device->session->handle, 5439 NULL, 5440 &acquiredLocks); 5441 if (status != NV_OK) 5442 { 5443 threadStateFree(&threadState, THREAD_STATE_FLAGS_NONE); 5444 return status; 5445 } 5446 5447 // Refresh CE information, which may have changed if a GPU has been 5448 // initialized by RM for the first time 5449 status = queryCopyEngines(device, cesCaps); 5450 _nvGpuOpsLocksRelease(&acquiredLocks); 5451 threadStateFree(&threadState, THREAD_STATE_FLAGS_NONE); 5452 return status; 5453 } 5454 5455 NV_STATUS nvGpuOpsQueryCaps(struct gpuDevice *device, gpuCaps *caps) 5456 { 5457 NV_STATUS status; 5458 nvGpuOpsLockSet acquiredLocks; 5459 THREAD_STATE_NODE threadState; 5460 NV0000_CTRL_GPU_GET_ID_INFO_V2_PARAMS infoParams = {0}; 5461 struct gpuSession *session = device->session; 5462 RM_API *pRmApi = rmapiGetInterface(RMAPI_GPU_LOCK_INTERNAL); 5463 5464 threadStateInit(&threadState, THREAD_STATE_FLAGS_NONE); 5465 status = _nvGpuOpsLocksAcquireAll(RMAPI_LOCK_FLAGS_READ, device->session->handle, NULL, &acquiredLocks); 5466 if (status != NV_OK) 5467 { 5468 threadStateFree(&threadState, THREAD_STATE_FLAGS_NONE); 5469 return status; 5470 } 5471 5472 infoParams.gpuId = device->gpuId; 5473 status = pRmApi->Control(pRmApi, 5474 session->handle, 5475 session->handle, 5476 NV0000_CTRL_CMD_GPU_GET_ID_INFO_V2, 5477 &infoParams, 5478 sizeof(infoParams)); 5479 if (status != NV_OK) 5480 goto cleanup; 5481 5482 if (infoParams.numaId != NV0000_CTRL_NO_NUMA_NODE) 5483 { 5484 caps->numaEnabled = NV_TRUE; 5485 caps->numaNodeId = infoParams.numaId; 5486 } 5487 5488 cleanup: 5489 _nvGpuOpsLocksRelease(&acquiredLocks); 5490 threadStateFree(&threadState, THREAD_STATE_FLAGS_NONE); 5491 return status; 5492 } 5493 5494 static NV_STATUS findVaspaceFromPid(unsigned pid, unsigned gpuId, 5495 NvHandle *hClient, NvHandle *hDevice, 5496 NvHandle *hSubdevice, NvHandle *hVaSpace) 5497 { 5498 // 5499 // This function iterates through all the vaspace objects under the client, 5500 // that matches the pid argument, and returns any address space that is 5501 // tagged as UVM. 5502 // 5503 Device *pDevice = NULL; 5504 Subdevice *pSubDevice = NULL; 5505 OBJVASPACE *pVAS = NULL; 5506 OBJGPU *pGpu; 5507 unsigned hDeviceLocal = 0; 5508 unsigned hSubDeviceLocal = 0; 5509 NV_STATUS status; 5510 RmClient **ppClient; 5511 RmClient *pClient; 5512 RsClient *pRsClient; 5513 5514 for (ppClient = serverutilGetFirstClientUnderLock(); 5515 ppClient; 5516 ppClient = serverutilGetNextClientUnderLock(ppClient)) 5517 { 5518 pClient = *ppClient; 5519 pRsClient = staticCast(pClient, RsClient); 5520 if (pClient->ProcID == pid) 5521 { 5522 pGpu = gpumgrGetGpuFromId(gpuId); 5523 if (!pGpu) 5524 return NV_ERR_INVALID_ARGUMENT; 5525 5526 status = subdeviceGetByGpu(pRsClient, pGpu, &pSubDevice); 5527 5528 if (status != NV_OK) 5529 continue; 5530 5531 GPU_RES_SET_THREAD_BC_STATE(pSubDevice); 5532 5533 status = deviceGetByGpu(pRsClient, pGpu, NV_TRUE, &pDevice); 5534 if (status == NV_OK) 5535 { 5536 hDeviceLocal = RES_GET_HANDLE(pDevice); 5537 5538 if (pSubDevice != NULL) 5539 hSubDeviceLocal = RES_GET_HANDLE(pSubDevice); 5540 5541 *hClient = pRsClient->hClient; 5542 *hDevice = hDeviceLocal; 5543 *hSubdevice = hSubDeviceLocal; 5544 5545 if (pDevice->vaMode != 5546 NV_DEVICE_ALLOCATION_VAMODE_MULTIPLE_VASPACES) 5547 { 5548 status = vaspaceGetByHandleOrDeviceDefault(pRsClient, hDeviceLocal, 0, &pVAS); 5549 if ((status != NV_OK) || (pVAS == NULL)) 5550 return NV_ERR_GENERIC; 5551 5552 // 5553 // TODO: Bug 1632484: 5554 // Check to see if pVAS is UVM_MANAGED, once 5555 // that vaspace property is introduced. 5556 // No need to check FaultCapable. 5557 // 5558 if ((vaspaceIsMirrored(pVAS)) || 5559 (vaspaceIsFaultCapable(pVAS))) 5560 { 5561 // 5562 // This means that this client is 5563 // using the vaspace associated to its device 5564 // 5565 *hVaSpace = 0; 5566 return NV_OK; 5567 } 5568 } 5569 5570 // 5571 // if the default VASPACE is not tagged as UVM 5572 // will search for all vaspace objects under 5573 // this client for this device to find the first 5574 // vaspace that is tagged as UVM. 5575 // 5576 if (findUvmAddressSpace(*hClient, pGpu->gpuInstance, hVaSpace, &pVAS) == NV_OK) 5577 { 5578 return NV_OK; 5579 } 5580 } 5581 } 5582 } 5583 return NV_ERR_GENERIC; 5584 } 5585 5586 // 5587 // This function will look through all the vaspaces under a client for a device and return 5588 // the one that is tagged as UVM, or NULL if there is no UVM vaspace. 5589 // 5590 static NV_STATUS findUvmAddressSpace(NvHandle hClient, NvU32 gpuInstance, NvHandle *phVaSpace, OBJVASPACE **ppVASpace) 5591 { 5592 RsResourceRef *pResourceRef; 5593 RS_ITERATOR iter; 5594 NvU32 gpuMask = NVBIT(gpuInstance); 5595 5596 iter = serverutilRefIter(hClient, NV01_NULL_OBJECT, classId(VaSpaceApi), RS_ITERATE_DESCENDANTS, NV_TRUE); 5597 5598 while (clientRefIterNext(iter.pClient, &iter)) 5599 { 5600 pResourceRef = iter.pResourceRef; 5601 5602 *ppVASpace = dynamicCast(pResourceRef->pResource, VaSpaceApi)->pVASpace; 5603 *phVaSpace = pResourceRef->hResource; 5604 5605 if ((vaspaceIsMirrored(*ppVASpace) || vaspaceIsExternallyOwned(*ppVASpace)) && 5606 (((*ppVASpace)->gpuMask & gpuMask) == gpuMask)) 5607 { 5608 return NV_OK; 5609 } 5610 } 5611 *phVaSpace = 0; 5612 *ppVASpace = NULL; 5613 return NV_ERR_INVALID_ARGUMENT; 5614 } 5615 5616 // Make sure UVM_GPU_NAME_LENGTH has the same length as 5617 // NV2080_GPU_MAX_NAME_STRING_LENGTH. 5618 ct_assert(NV2080_GPU_MAX_NAME_STRING_LENGTH == UVM_GPU_NAME_LENGTH); 5619 5620 static void getGpcTpcInfo(OBJGPU *pGpu, gpuInfo *pGpuInfo) 5621 { 5622 KernelGraphicsManager *pKernelGraphicsManager = GPU_GET_KERNEL_GRAPHICS_MANAGER(pGpu); 5623 5624 pGpuInfo->maxTpcPerGpcCount = 0; 5625 pGpuInfo->maxGpcCount = 0; 5626 pGpuInfo->gpcCount = 0; 5627 pGpuInfo->tpcCount = 0; 5628 5629 NV_ASSERT_OR_RETURN_VOID(pKernelGraphicsManager->legacyKgraphicsStaticInfo.bInitialized); 5630 NV_ASSERT_OR_RETURN_VOID(pKernelGraphicsManager->legacyKgraphicsStaticInfo.pGrInfo != NULL); 5631 5632 pGpuInfo->maxTpcPerGpcCount = 5633 pKernelGraphicsManager->legacyKgraphicsStaticInfo.pGrInfo->infoList[NV2080_CTRL_GR_INFO_INDEX_LITTER_NUM_TPC_PER_GPC].data; 5634 pGpuInfo->maxGpcCount = 5635 pKernelGraphicsManager->legacyKgraphicsStaticInfo.pGrInfo->infoList[NV2080_CTRL_GR_INFO_INDEX_LITTER_NUM_GPCS].data; 5636 pGpuInfo->gpcCount = 5637 nvPopCount32(pKernelGraphicsManager->legacyKgraphicsStaticInfo.floorsweepingMasks.gpcMask); 5638 5639 // 5640 // When MIG GPU partitioning is enabled, compute the upper bound on the number 5641 // of TPCs that may be available in this partition, to enable UVM to 5642 // conservatively size relevant data structures. 5643 // 5644 if (IS_MIG_IN_USE(pGpu)) 5645 { 5646 pGpuInfo->tpcCount = pGpuInfo->gpcCount * pGpuInfo->maxTpcPerGpcCount; 5647 } 5648 else 5649 { 5650 KernelGraphics *pKernelGraphics = GPU_GET_KERNEL_GRAPHICS(pGpu, 0); 5651 const KGRAPHICS_STATIC_INFO *pKernelGraphicsStaticInfo = kgraphicsGetStaticInfo(pGpu, pKernelGraphics); 5652 5653 NV_ASSERT_OR_RETURN_VOID(pKernelGraphicsStaticInfo != NULL); 5654 pGpuInfo->tpcCount = pKernelGraphicsStaticInfo->pGrInfo->infoList[NV2080_CTRL_GR_INFO_INDEX_SHADER_PIPE_SUB_COUNT].data; 5655 } 5656 } 5657 5658 static NV_STATUS queryVirtMode(NvHandle hClient, NvHandle hDevice, NvU32 *virtMode) 5659 { 5660 NV_STATUS status = NV_OK; 5661 *virtMode = UVM_VIRT_MODE_NONE; 5662 return status; 5663 } 5664 5665 static NV_STATUS 5666 nvGpuOpsQueryGpuConfidentialComputeCaps(NvHandle hClient, 5667 UvmGpuConfComputeCaps *pGpuConfComputeCaps) 5668 { 5669 NV_CONFIDENTIAL_COMPUTE_ALLOC_PARAMS confComputeAllocParams = {0}; 5670 NV_CONF_COMPUTE_CTRL_CMD_SYSTEM_GET_CAPABILITIES_PARAMS confComputeParams = {0}; 5671 RM_API *pRmApi = rmapiGetInterface(RMAPI_EXTERNAL_KERNEL); 5672 NvHandle hConfCompute = 0; 5673 NV_STATUS status = NV_OK; 5674 5675 confComputeAllocParams.hClient = hClient; 5676 status = pRmApi->Alloc(pRmApi, 5677 hClient, 5678 hClient, 5679 &hConfCompute, 5680 NV_CONFIDENTIAL_COMPUTE, 5681 &confComputeAllocParams, 5682 sizeof(confComputeAllocParams)); 5683 if (status == NV_ERR_INVALID_CLASS) 5684 { 5685 pGpuConfComputeCaps->mode = UVM_GPU_CONF_COMPUTE_MODE_NONE; 5686 return NV_OK; 5687 } 5688 else 5689 { 5690 NV_ASSERT_OK_OR_RETURN(status); 5691 } 5692 5693 NV_ASSERT_OK_OR_GOTO(status, 5694 pRmApi->Control(pRmApi, 5695 hClient, 5696 hConfCompute, 5697 NV_CONF_COMPUTE_CTRL_CMD_SYSTEM_GET_CAPABILITIES, 5698 &confComputeParams, 5699 sizeof(confComputeParams)), 5700 cleanup); 5701 5702 if (confComputeParams.ccFeature == NV_CONF_COMPUTE_SYSTEM_FEATURE_APM_ENABLED) 5703 { 5704 pGpuConfComputeCaps->mode = UVM_GPU_CONF_COMPUTE_MODE_APM; 5705 } 5706 else if (confComputeParams.ccFeature == NV_CONF_COMPUTE_SYSTEM_FEATURE_HCC_ENABLED) 5707 { 5708 pGpuConfComputeCaps->mode = UVM_GPU_CONF_COMPUTE_MODE_HCC; 5709 } 5710 5711 cleanup: 5712 pRmApi->Free(pRmApi, hClient, hConfCompute); 5713 return status; 5714 } 5715 5716 static NV_STATUS getSysmemLinkInfo(NvHandle hClient, 5717 NvHandle hSubDevice, 5718 gpuInfo *pGpuInfo) 5719 { 5720 NvU32 sysmemConnType; 5721 NV2080_CTRL_BUS_GET_INFO_V2_PARAMS *busInfoParams; 5722 RM_API *pRmApi = rmapiGetInterface(RMAPI_EXTERNAL_KERNEL); 5723 NV_STATUS status; 5724 5725 pGpuInfo->sysmemLink = UVM_LINK_TYPE_NONE; 5726 5727 busInfoParams = portMemAllocNonPaged(sizeof(*busInfoParams)); 5728 if (busInfoParams == NULL) 5729 return NV_ERR_INSUFFICIENT_RESOURCES; 5730 5731 portMemSet(busInfoParams, 0, sizeof(*busInfoParams)); 5732 busInfoParams->busInfoListSize = 1; 5733 busInfoParams->busInfoList[0].index = NV2080_CTRL_BUS_INFO_INDEX_SYSMEM_CONNECTION_TYPE; 5734 status = pRmApi->Control(pRmApi, 5735 hClient, 5736 hSubDevice, 5737 NV2080_CTRL_CMD_BUS_GET_INFO_V2, 5738 busInfoParams, 5739 sizeof(*busInfoParams)); 5740 sysmemConnType = busInfoParams->busInfoList[0].data; 5741 portMemFree(busInfoParams); 5742 5743 if (status != NV_OK) 5744 return status; 5745 5746 switch (sysmemConnType) 5747 { 5748 case NV2080_CTRL_BUS_INFO_INDEX_SYSMEM_CONNECTION_TYPE_NVLINK: 5749 { 5750 NV2080_CTRL_CMD_NVLINK_GET_NVLINK_STATUS_PARAMS *nvlinkStatus; 5751 NvU32 nvlinkVersion; 5752 NvBool atomicSupported; 5753 5754 status = allocNvlinkStatus(hClient, hSubDevice, &nvlinkStatus); 5755 if (status != NV_OK) 5756 return status; 5757 5758 nvlinkVersion = getNvlinkConnectionToNpu(nvlinkStatus, 5759 &atomicSupported, 5760 &pGpuInfo->sysmemLinkRateMBps); 5761 5762 pGpuInfo->sysmemLink = rmControlToUvmNvlinkVersion(nvlinkVersion); 5763 5764 portMemFree(nvlinkStatus); 5765 break; 5766 } 5767 case NV2080_CTRL_BUS_INFO_INDEX_SYSMEM_CONNECTION_TYPE_C2C: 5768 { 5769 NvBool c2cConnectedToCpu = NV_FALSE; 5770 5771 status = getC2CConnectionToCpu(hClient, 5772 hSubDevice, 5773 &c2cConnectedToCpu, 5774 &pGpuInfo->sysmemLinkRateMBps); 5775 if (status != NV_OK) 5776 return status; 5777 5778 if (c2cConnectedToCpu == NV_FALSE) 5779 { 5780 NV_ASSERT(0); 5781 return NV_ERR_INVALID_STATE; 5782 } 5783 5784 pGpuInfo->sysmemLink = UVM_LINK_TYPE_C2C; 5785 break; 5786 } 5787 case NV2080_CTRL_BUS_INFO_INDEX_SYSMEM_CONNECTION_TYPE_PCIE: 5788 { 5789 pGpuInfo->sysmemLink = UVM_LINK_TYPE_PCIE; 5790 status = getPCIELinkRateMBps(hClient, hSubDevice, &pGpuInfo->sysmemLinkRateMBps); 5791 if (status != NV_OK) 5792 return status; 5793 break; 5794 } 5795 default: 5796 { 5797 NV_PRINTF(LEVEL_ERROR, "Unsupported sysmem connection type: %d\n", 5798 sysmemConnType); 5799 break; 5800 } 5801 } 5802 5803 NV_PRINTF(LEVEL_INFO, "sysmem link type: %d bw: %u\n", pGpuInfo->sysmemLink, pGpuInfo->sysmemLinkRateMBps); 5804 NV_ASSERT(pGpuInfo->sysmemLink != UVM_LINK_TYPE_NONE); 5805 return NV_OK; 5806 } 5807 5808 static NV_STATUS getSystemMemoryWindow(OBJGPU *pGpu, gpuInfo *pGpuInfo) 5809 { 5810 KernelMemorySystem *pKernelMemorySystem = GPU_GET_KERNEL_MEMORY_SYSTEM(pGpu); 5811 5812 if (!pKernelMemorySystem) 5813 return NV_ERR_OBJECT_NOT_FOUND; 5814 5815 if (pGpu->getProperty(pGpu, PDB_PROP_GPU_ATS_SUPPORTED)) 5816 { 5817 pGpuInfo->systemMemoryWindowStart = pKernelMemorySystem->coherentCpuFbBase; 5818 pGpuInfo->systemMemoryWindowSize = pKernelMemorySystem->coherentCpuFbEnd - 5819 pKernelMemorySystem->coherentCpuFbBase; 5820 } 5821 else 5822 { 5823 pGpuInfo->systemMemoryWindowStart = 0; 5824 pGpuInfo->systemMemoryWindowSize = 0; 5825 } 5826 5827 return NV_OK; 5828 } 5829 5830 static NV_STATUS getNvswitchInfo(OBJGPU *pGpu, 5831 NvHandle hClient, 5832 NvHandle hSubDevice, 5833 gpuInfo *pGpuInfo) 5834 { 5835 NV2080_CTRL_CMD_NVLINK_GET_NVLINK_STATUS_PARAMS *nvlinkStatus; 5836 NvU32 nvlinkVersion; 5837 NvU32 linkBandwidthMBps; 5838 NV_STATUS status; 5839 5840 pGpuInfo->connectedToSwitch = NV_FALSE; 5841 5842 status = allocNvlinkStatus(hClient, hSubDevice, &nvlinkStatus); 5843 if (status != NV_OK) 5844 return status; 5845 5846 nvlinkVersion = getNvlinkConnectionToSwitch(nvlinkStatus, &linkBandwidthMBps); 5847 5848 if (rmControlToUvmNvlinkVersion(nvlinkVersion) != UVM_LINK_TYPE_NONE) 5849 { 5850 KernelNvlink *pKernelNvlink = GPU_GET_KERNEL_NVLINK(pGpu); 5851 5852 NV_ASSERT(rmControlToUvmNvlinkVersion(nvlinkVersion) != UVM_LINK_TYPE_NVLINK_1); 5853 pGpuInfo->connectedToSwitch = NV_TRUE; 5854 5855 if (pKernelNvlink == NULL) 5856 { 5857 pGpuInfo->nvswitchMemoryWindowStart = NVLINK_INVALID_FABRIC_ADDR; 5858 } 5859 else 5860 { 5861 pGpuInfo->nvswitchMemoryWindowStart = knvlinkGetUniqueFabricBaseAddress(pGpu, pKernelNvlink); 5862 } 5863 } 5864 5865 portMemFree(nvlinkStatus); 5866 5867 return NV_OK; 5868 } 5869 5870 NV_STATUS nvGpuOpsGetGpuInfo(const NvProcessorUuid *pUuid, 5871 const gpuClientInfo *pGpuClientInfo, 5872 gpuInfo *pGpuInfo) 5873 { 5874 NV_STATUS status; 5875 NV0080_ALLOC_PARAMETERS nv0080AllocParams = {0}; 5876 NV2080_ALLOC_PARAMETERS nv2080AllocParams = {0}; 5877 NV0000_CTRL_GPU_GET_UUID_INFO_PARAMS gpuIdInfoParams = {{0}}; 5878 NV2080_CTRL_MC_GET_ARCH_INFO_PARAMS archInfoParams = {0}; 5879 NV2080_CTRL_GPU_GET_NAME_STRING_PARAMS gpuNameParams = {0}; 5880 NvHandle clientHandle = 0; 5881 NvHandle deviceHandle = 1; 5882 NvHandle subDeviceHandle = 2; 5883 NvBool isClientAllocated = NV_FALSE; 5884 NvBool isDeviceAllocated = NV_FALSE; 5885 NvBool isSubdeviceAllocated = NV_FALSE; 5886 NV0080_CTRL_GPU_GET_NUM_SUBDEVICES_PARAMS subDevParams = { 0 }; 5887 NV2080_CTRL_GPU_GET_SIMULATION_INFO_PARAMS simulationInfoParams = {0}; 5888 OBJGPU *pGpu = NULL; 5889 RM_API *pRmApi = rmapiGetInterface(RMAPI_EXTERNAL_KERNEL); 5890 NvU32 dummy; 5891 5892 pGpu = gpumgrGetGpuFromUuid(pUuid->uuid, 5893 DRF_DEF(2080_GPU_CMD, _GPU_GET_GID_FLAGS, _TYPE, _SHA1) | 5894 DRF_DEF(2080_GPU_CMD, _GPU_GET_GID_FLAGS, _FORMAT, _BINARY)); 5895 if (!pGpu) 5896 { 5897 return NV_ERR_GPU_UUID_NOT_FOUND; 5898 } 5899 5900 if (!osIsGpuAccessible(pGpu)) 5901 { 5902 return NV_ERR_INSUFFICIENT_PERMISSIONS; 5903 } 5904 5905 status = nvGpuOpsCreateClient(pRmApi, &clientHandle); 5906 if (status != NV_OK) 5907 { 5908 return status; 5909 } 5910 5911 isClientAllocated = NV_TRUE; 5912 5913 portMemCopy(&gpuIdInfoParams.gpuUuid, sizeof(*pUuid), pUuid, sizeof(*pUuid)); 5914 5915 gpuIdInfoParams.flags = NV0000_CTRL_CMD_GPU_GET_UUID_INFO_FLAGS_FORMAT_BINARY; 5916 status = pRmApi->Control(pRmApi, 5917 clientHandle, 5918 clientHandle, 5919 NV0000_CTRL_CMD_GPU_GET_UUID_INFO, 5920 &gpuIdInfoParams, 5921 sizeof(gpuIdInfoParams)); 5922 if (NV_OK != status) 5923 goto cleanup; 5924 5925 nv0080AllocParams.deviceId = gpuIdInfoParams.deviceInstance; 5926 5927 status = pRmApi->Alloc(pRmApi, 5928 clientHandle, 5929 clientHandle, 5930 &deviceHandle, 5931 NV01_DEVICE_0, 5932 &nv0080AllocParams, 5933 sizeof(nv0080AllocParams)); 5934 if (NV_OK != status) 5935 goto cleanup; 5936 5937 isDeviceAllocated = NV_TRUE; 5938 5939 nv2080AllocParams.subDeviceId = gpuIdInfoParams.subdeviceInstance; 5940 status = pRmApi->Alloc(pRmApi, 5941 clientHandle, 5942 deviceHandle, 5943 &subDeviceHandle, 5944 NV20_SUBDEVICE_0, 5945 &nv2080AllocParams, 5946 sizeof(nv2080AllocParams)); 5947 if (NV_OK != status) 5948 goto cleanup; 5949 5950 isSubdeviceAllocated = NV_TRUE; 5951 5952 portMemCopy(&pGpuInfo->uuid, sizeof(*pUuid), pUuid, sizeof(*pUuid)); 5953 5954 status = pRmApi->Control(pRmApi, 5955 clientHandle, 5956 subDeviceHandle, 5957 NV2080_CTRL_CMD_MC_GET_ARCH_INFO, 5958 &archInfoParams, 5959 sizeof(archInfoParams)); 5960 if (NV_OK != status) 5961 goto cleanup; 5962 5963 pGpuInfo->gpuArch = archInfoParams.architecture; 5964 pGpuInfo->gpuImplementation = archInfoParams.implementation; 5965 5966 gpuNameParams.gpuNameStringFlags = NV2080_CTRL_GPU_GET_NAME_STRING_FLAGS_TYPE_ASCII; 5967 status = pRmApi->Control(pRmApi, 5968 clientHandle, 5969 subDeviceHandle, 5970 NV2080_CTRL_CMD_GPU_GET_NAME_STRING, 5971 &gpuNameParams, 5972 sizeof(gpuNameParams)); 5973 if (NV_OK != status) 5974 goto cleanup; 5975 5976 portStringCopy(pGpuInfo->name, sizeof(pGpuInfo->name), 5977 (const char *)gpuNameParams.gpuNameString.ascii, 5978 sizeof(gpuNameParams.gpuNameString.ascii)); 5979 5980 status = queryVirtMode(clientHandle, deviceHandle, &pGpuInfo->virtMode); 5981 if (status != NV_OK) 5982 goto cleanup; 5983 5984 pGpuInfo->gpuInTcc = NV_FALSE; 5985 5986 status = findDeviceClasses(clientHandle, 5987 deviceHandle, 5988 subDeviceHandle, 5989 &pGpuInfo->hostClass, 5990 &pGpuInfo->ceClass, 5991 &pGpuInfo->computeClass, 5992 &dummy, 5993 &dummy, 5994 &dummy); 5995 if (status != NV_OK) 5996 goto cleanup; 5997 5998 status = pRmApi->Control(pRmApi, 5999 clientHandle, 6000 deviceHandle, 6001 NV0080_CTRL_CMD_GPU_GET_NUM_SUBDEVICES, 6002 &subDevParams, 6003 sizeof(subDevParams)); 6004 if (status != NV_OK) 6005 goto cleanup; 6006 6007 pGpuInfo->subdeviceCount = subDevParams.numSubDevices; 6008 6009 getGpcTpcInfo(pGpu, pGpuInfo); 6010 6011 if (IS_MIG_IN_USE(pGpu)) 6012 { 6013 NvU32 swizzId; 6014 6015 NV_ASSERT(pGpuInfo->subdeviceCount == 1); 6016 6017 status = getSwizzIdFromUserSmcPartHandle(pRmApi, 6018 clientHandle, 6019 deviceHandle, 6020 pGpuClientInfo->hClient, 6021 pGpuClientInfo->hSmcPartRef, 6022 &swizzId); 6023 if (status != NV_OK) 6024 goto cleanup; 6025 6026 pGpuInfo->smcEnabled = NV_TRUE; 6027 pGpuInfo->smcSwizzId = swizzId; 6028 pGpuInfo->smcUserClientInfo.hClient = pGpuClientInfo->hClient; 6029 pGpuInfo->smcUserClientInfo.hSmcPartRef = pGpuClientInfo->hSmcPartRef; 6030 } 6031 6032 status = pRmApi->Control(pRmApi, 6033 clientHandle, 6034 subDeviceHandle, 6035 NV2080_CTRL_CMD_GPU_GET_SIMULATION_INFO, 6036 &simulationInfoParams, 6037 sizeof(simulationInfoParams)); 6038 if (status != NV_OK) 6039 goto cleanup; 6040 6041 pGpuInfo->isSimulated = (simulationInfoParams.type != NV2080_CTRL_GPU_GET_SIMULATION_INFO_TYPE_NONE); 6042 6043 portMemSet(&pGpuInfo->gpuConfComputeCaps, 0, sizeof(pGpuInfo->gpuConfComputeCaps)); 6044 6045 status = nvGpuOpsQueryGpuConfidentialComputeCaps(clientHandle, &pGpuInfo->gpuConfComputeCaps); 6046 if (status != NV_OK) 6047 goto cleanup; 6048 6049 status = getSysmemLinkInfo(clientHandle, subDeviceHandle, pGpuInfo); 6050 if (status != NV_OK) 6051 goto cleanup; 6052 6053 status = getSystemMemoryWindow(pGpu, pGpuInfo); 6054 if (status != NV_OK) 6055 goto cleanup; 6056 6057 status = getNvswitchInfo(pGpu, clientHandle, subDeviceHandle, pGpuInfo); 6058 if (status != NV_OK) 6059 goto cleanup; 6060 6061 cleanup: 6062 if (isSubdeviceAllocated) 6063 pRmApi->Free(pRmApi, clientHandle, subDeviceHandle); 6064 6065 if (isDeviceAllocated) 6066 pRmApi->Free(pRmApi, clientHandle, deviceHandle); 6067 6068 if (isClientAllocated) 6069 pRmApi->Free(pRmApi, clientHandle, clientHandle); 6070 6071 return status; 6072 } 6073 6074 NV_STATUS nvGpuOpsGetGpuIds(const NvU8 *pUuid, 6075 unsigned uuidLength, 6076 NvU32 *pDeviceId, 6077 NvU32 *pSubdeviceId) 6078 { 6079 NV_STATUS nvStatus; 6080 nvGpuOpsLockSet acquiredLocks; 6081 THREAD_STATE_NODE threadState; 6082 NV0000_CTRL_GPU_GET_UUID_INFO_PARAMS gpuIdInfoParams = {{0}}; 6083 NvHandle clientHandle = 0; 6084 RM_API *pRmApi = rmapiGetInterface(RMAPI_GPU_LOCK_INTERNAL); 6085 6086 threadStateInit(&threadState, THREAD_STATE_FLAGS_NONE); 6087 nvStatus = _nvGpuOpsLocksAcquireAll(RMAPI_LOCK_FLAGS_NONE, NV01_NULL_OBJECT, NULL, &acquiredLocks); 6088 if (nvStatus != NV_OK) 6089 { 6090 threadStateFree(&threadState, THREAD_STATE_FLAGS_NONE); 6091 return nvStatus; 6092 } 6093 6094 nvStatus = nvGpuOpsCreateClient(pRmApi, &clientHandle); 6095 if (nvStatus != NV_OK) 6096 { 6097 _nvGpuOpsLocksRelease(&acquiredLocks); 6098 threadStateFree(&threadState, THREAD_STATE_FLAGS_NONE); 6099 return nvStatus; 6100 } 6101 6102 portMemCopy(&gpuIdInfoParams.gpuUuid, uuidLength, pUuid, uuidLength); 6103 6104 gpuIdInfoParams.flags = NV0000_CTRL_CMD_GPU_GET_UUID_INFO_FLAGS_FORMAT_BINARY; 6105 nvStatus = pRmApi->Control(pRmApi, 6106 clientHandle, 6107 clientHandle, 6108 NV0000_CTRL_CMD_GPU_GET_UUID_INFO, 6109 &gpuIdInfoParams, 6110 sizeof(gpuIdInfoParams)); 6111 if (NV_OK == nvStatus) 6112 { 6113 *pDeviceId = gpuIdInfoParams.deviceInstance; 6114 *pSubdeviceId = gpuIdInfoParams.subdeviceInstance; 6115 } 6116 6117 pRmApi->Free(pRmApi, clientHandle, clientHandle); 6118 6119 _nvGpuOpsLocksRelease(&acquiredLocks); 6120 threadStateFree(&threadState, THREAD_STATE_FLAGS_NONE); 6121 return nvStatus; 6122 } 6123 6124 NV_STATUS nvGpuOpsServiceDeviceInterruptsRM(struct gpuDevice *device) 6125 { 6126 NV_STATUS status; 6127 nvGpuOpsLockSet acquiredLocks; 6128 THREAD_STATE_NODE threadState; 6129 NV2080_CTRL_MC_SERVICE_INTERRUPTS_PARAMS params = {0}; 6130 RM_API *pRmApi = rmapiGetInterface(RMAPI_GPU_LOCK_INTERNAL); 6131 6132 threadStateInit(&threadState, THREAD_STATE_FLAGS_NONE); 6133 status = _nvGpuOpsLocksAcquireAll(RMAPI_LOCK_FLAGS_NONE, device->session->handle, NULL, &acquiredLocks); 6134 if (status != NV_OK) 6135 { 6136 threadStateFree(&threadState, THREAD_STATE_FLAGS_NONE); 6137 return status; 6138 } 6139 6140 params.engines = NV2080_CTRL_MC_ENGINE_ID_ALL; 6141 status = pRmApi->Control(pRmApi, 6142 device->session->handle, 6143 device->subhandle, 6144 NV2080_CTRL_CMD_MC_SERVICE_INTERRUPTS, 6145 ¶ms, 6146 sizeof(params)); 6147 6148 _nvGpuOpsLocksRelease(&acquiredLocks); 6149 threadStateFree(&threadState, THREAD_STATE_FLAGS_NONE); 6150 return status; 6151 } 6152 6153 NV_STATUS nvGpuOpsCheckEccErrorSlowpath(struct gpuChannel *channel, 6154 NvBool *bEccDbeSet) 6155 { 6156 NV_STATUS status = NV_OK; 6157 nvGpuOpsLockSet acquiredLocks; 6158 THREAD_STATE_NODE threadState; 6159 NV2080_CTRL_GPU_QUERY_ECC_STATUS_PARAMS eccStatus; 6160 NvU32 i = 0; 6161 RM_API *pRmApi = rmapiGetInterface(RMAPI_GPU_LOCK_INTERNAL); 6162 6163 if (!channel || !bEccDbeSet) 6164 { 6165 return NV_ERR_INVALID_ARGUMENT; 6166 } 6167 6168 threadStateInit(&threadState, THREAD_STATE_FLAGS_NONE); 6169 status = _nvGpuOpsLocksAcquireAll(RMAPI_LOCK_FLAGS_READ, 6170 channel->tsg->vaSpace->device->session->handle, 6171 NULL, 6172 &acquiredLocks); 6173 if (status != NV_OK) 6174 { 6175 threadStateFree(&threadState, THREAD_STATE_FLAGS_NONE); 6176 return status; 6177 } 6178 6179 *bEccDbeSet = NV_FALSE; 6180 6181 // Do anything only if ECC is enabled on this device 6182 if (channel->tsg->vaSpace->device->rmSubDevice->bEccEnabled) 6183 { 6184 portMemSet(&eccStatus, 0, sizeof(eccStatus)); 6185 6186 status = pRmApi->Control(pRmApi, 6187 channel->tsg->vaSpace->device->session->handle, 6188 channel->tsg->vaSpace->device->subhandle, 6189 NV2080_CTRL_CMD_GPU_QUERY_ECC_STATUS, 6190 &eccStatus, 6191 sizeof(eccStatus)); 6192 if (status != NV_OK) 6193 { 6194 _nvGpuOpsLocksRelease(&acquiredLocks); 6195 threadStateFree(&threadState, THREAD_STATE_FLAGS_NONE); 6196 return NV_ERR_GENERIC; 6197 } 6198 6199 for (i = 0; i < NV2080_CTRL_GPU_ECC_UNIT_COUNT; i++) 6200 { 6201 if (eccStatus.units[i].dbe.count != 0) 6202 { 6203 *bEccDbeSet = NV_TRUE; 6204 } 6205 } 6206 } 6207 6208 _nvGpuOpsLocksRelease(&acquiredLocks); 6209 threadStateFree(&threadState, THREAD_STATE_FLAGS_NONE); 6210 return status; 6211 } 6212 6213 static NV_STATUS nvGpuOpsFillGpuMemoryInfo(PMEMORY_DESCRIPTOR pMemDesc, 6214 OBJGPU *pMappingGpu, 6215 gpuMemoryInfo *pGpuMemoryInfo) 6216 { 6217 NV_STATUS status; 6218 PMEMORY_DESCRIPTOR pRootMemDesc = memdescGetRootMemDesc(pMemDesc, NULL); 6219 OBJGPU *pGpu = (pMemDesc->pGpu == NULL) ? pMappingGpu : pMemDesc->pGpu; 6220 6221 status = nvGpuOpsMemGetPageSize(pMappingGpu, 6222 pMemDesc, 6223 &pGpuMemoryInfo->pageSize); 6224 if (status != NV_OK) 6225 return status; 6226 6227 pGpuMemoryInfo->size = memdescGetSize(pMemDesc); 6228 6229 pGpuMemoryInfo->contig = memdescGetContiguity(pMemDesc, AT_GPU); 6230 6231 if (pGpuMemoryInfo->contig) 6232 { 6233 GMMU_APERTURE aperture = nvGpuOpsGetExternalAllocAperture(pMemDesc, NV_FALSE, NV_FALSE); 6234 NvU64 physAddr; 6235 6236 memdescGetPhysAddrsForGpu(pMemDesc, pMappingGpu, AT_GPU, 0, 0, 1, &physAddr); 6237 6238 pGpuMemoryInfo->physAddr = 6239 kgmmuEncodePhysAddr(GPU_GET_KERNEL_GMMU(pGpu), aperture, physAddr, NVLINK_INVALID_FABRIC_ADDR); 6240 } 6241 6242 pGpuMemoryInfo->kind = memdescGetPteKindForGpu(pMemDesc, pMappingGpu); 6243 6244 pGpuMemoryInfo->sysmem = (memdescGetAddressSpace(pMemDesc) == ADDR_SYSMEM); 6245 6246 pGpuMemoryInfo->deviceDescendant = pRootMemDesc->pGpu != NULL; 6247 6248 if (pGpuMemoryInfo->deviceDescendant) 6249 { 6250 NvU8 *uuid; 6251 NvU32 uuidLength, flags; 6252 NV_STATUS status; 6253 flags = DRF_DEF(2080_GPU_CMD, _GPU_GET_GID_FLAGS, _TYPE, _SHA1) | 6254 DRF_DEF(2080_GPU_CMD, _GPU_GET_GID_FLAGS, _FORMAT, _BINARY); 6255 6256 // on success, allocates memory for uuid 6257 status = gpuGetGidInfo(pGpu, &uuid, &uuidLength, flags); 6258 if (status != NV_OK) 6259 return status; 6260 6261 portMemCopy(&pGpuMemoryInfo->uuid, uuidLength, uuid, uuidLength); 6262 portMemFree(uuid); 6263 } 6264 6265 return NV_OK; 6266 } 6267 6268 static NvBool memdescIsSysmem(PMEMORY_DESCRIPTOR pMemDesc) 6269 { 6270 return (memdescGetAddressSpace(pMemDesc) == ADDR_SYSMEM) && 6271 !(memdescGetFlag(pMemDesc, MEMDESC_FLAGS_MAP_SYSCOH_OVER_BAR1)); 6272 } 6273 6274 static NV_STATUS dupMemory(struct gpuDevice *device, 6275 NvHandle hClient, 6276 NvHandle hPhysMemory, 6277 NvU32 flags, 6278 NvHandle *hDupMemory, 6279 gpuMemoryInfo *pGpuMemoryInfo) 6280 { 6281 NV_STATUS status = NV_OK; 6282 nvGpuOpsLockSet acquiredLocks; 6283 THREAD_STATE_NODE threadState; 6284 NvHandle dupedMemHandle; 6285 Memory *pMemory = NULL; 6286 PMEMORY_DESCRIPTOR pMemDesc = NULL; 6287 MEMORY_DESCRIPTOR *pAdjustedMemDesc = NULL; 6288 FABRIC_VASPACE *pFabricVAS = NULL; 6289 OBJGPU *pMappingGpu; 6290 RM_API *pRmApi = rmapiGetInterface(RMAPI_GPU_LOCK_INTERNAL); 6291 RsClient *pSessionClient; 6292 RsResourceRef *pResourceRef; 6293 RsResourceRef *pParentRef; 6294 Subdevice *pSubdevice; 6295 struct gpuSession *session; 6296 NvHandle hParent; 6297 NvBool bIsIndirectPeer = NV_FALSE; 6298 6299 if (!device || !hDupMemory) 6300 return NV_ERR_INVALID_ARGUMENT; 6301 6302 NV_ASSERT((flags == NV04_DUP_HANDLE_FLAGS_REJECT_KERNEL_DUP_PRIVILEGE) || (flags == NV04_DUP_HANDLE_FLAGS_NONE)); 6303 6304 threadStateInit(&threadState, THREAD_STATE_FLAGS_NONE); 6305 6306 // RS-TODO use dual client locking 6307 status = _nvGpuOpsLocksAcquireAll(RMAPI_LOCK_FLAGS_NONE, device->session->handle, 6308 &pSessionClient, &acquiredLocks); 6309 if (status != NV_OK) 6310 { 6311 threadStateFree(&threadState, THREAD_STATE_FLAGS_NONE); 6312 return status; 6313 } 6314 6315 status = subdeviceGetByHandle(pSessionClient, device->subhandle, &pSubdevice); 6316 if (status != NV_OK) 6317 goto done; 6318 6319 pMappingGpu = GPU_RES_GET_GPU(pSubdevice); 6320 6321 GPU_RES_SET_THREAD_BC_STATE(pSubdevice); 6322 6323 // Get all the necessary information about the memory 6324 status = nvGpuOpsGetMemoryByHandle(hClient, 6325 hPhysMemory, 6326 &pMemory); 6327 if (status != NV_OK) 6328 goto done; 6329 6330 // RM client allocations can't have multiple memdesc. 6331 pMemDesc = pMemory->pMemDesc; 6332 NV_ASSERT(!memdescHasSubDeviceMemDescs(pMemDesc)); 6333 6334 pAdjustedMemDesc = pMemDesc; 6335 pFabricVAS = dynamicCast(pMappingGpu->pFabricVAS, FABRIC_VASPACE); 6336 if (pFabricVAS != NULL) 6337 { 6338 status = fabricvaspaceGetGpaMemdesc(pFabricVAS, pMemDesc, pMappingGpu, &pAdjustedMemDesc); 6339 if (status != NV_OK) 6340 goto done; 6341 } 6342 6343 if (memdescGetAddressSpace(pAdjustedMemDesc) != ADDR_FBMEM && 6344 memdescGetAddressSpace(pAdjustedMemDesc) != ADDR_SYSMEM && 6345 memdescGetAddressSpace(pAdjustedMemDesc) != ADDR_FABRIC_MC && 6346 memdescGetAddressSpace(pAdjustedMemDesc) != ADDR_FABRIC_V2) 6347 { 6348 status = NV_ERR_NOT_SUPPORTED; 6349 goto freeGpaMemdesc; 6350 } 6351 6352 // For SYSMEM or indirect peer mappings 6353 bIsIndirectPeer = gpumgrCheckIndirectPeer(pMappingGpu, pAdjustedMemDesc->pGpu); 6354 if (bIsIndirectPeer || 6355 memdescIsSysmem(pAdjustedMemDesc)) 6356 { 6357 // For sysmem allocations, the dup done below is very shallow and in 6358 // particular doesn't create IOMMU mappings required for the mapped GPU 6359 // to access the memory. That's a problem if the mapped GPU is different 6360 // from the GPU that the allocation was created under. Add them 6361 // explicitly here and remove them when the memory is freed in n 6362 // nvGpuOpsFreeDupedHandle(). Notably memdescMapIommu() refcounts the 6363 // mappings so it's ok to call it if the mappings are already there. 6364 // 6365 // TODO: Bug 1811060: Add native support for this use-case in RM API. 6366 status = memdescMapIommu(pAdjustedMemDesc, pMappingGpu->busInfo.iovaspaceId); 6367 if (status != NV_OK) 6368 goto freeGpaMemdesc; 6369 } 6370 6371 session = device->session; 6372 6373 if (pGpuMemoryInfo) 6374 { 6375 RsClient *pClient; 6376 status = serverGetClientUnderLock(&g_resServ, session->handle, &pClient); 6377 if (status != NV_OK) 6378 goto freeGpaMemdesc; 6379 6380 status = nvGpuOpsFillGpuMemoryInfo(pAdjustedMemDesc, pMappingGpu, pGpuMemoryInfo); 6381 if (status != NV_OK) 6382 goto freeGpaMemdesc; 6383 } 6384 6385 pResourceRef = RES_GET_REF(pMemory); 6386 pParentRef = pResourceRef->pParentRef; 6387 6388 // TODO: Bug 2479851: temporarily detect the type of the parent of the 6389 // memory object (device or subdevice). Once CUDA switches to subdevices, 6390 // we will use subdevice handles unconditionally, here. 6391 if (dynamicCast(pParentRef->pResource, Subdevice)) 6392 { 6393 hParent = device->subhandle; 6394 } 6395 else if (dynamicCast(pParentRef->pResource, RsClientResource)) 6396 { 6397 NvBool bAssert = ( 6398 (memdescGetAddressSpace(pAdjustedMemDesc) == ADDR_FABRIC_MC) || 6399 (memdescGetAddressSpace(pAdjustedMemDesc) == ADDR_FABRIC_V2)); 6400 6401 NV_ASSERT(bAssert); 6402 6403 hParent = session->handle; 6404 } 6405 else 6406 { 6407 NV_ASSERT(dynamicCast(pParentRef->pResource, Device)); 6408 hParent = device->handle; 6409 } 6410 6411 dupedMemHandle = NV01_NULL_OBJECT; 6412 status = pRmApi->DupObject(pRmApi, 6413 session->handle, 6414 hParent, 6415 &dupedMemHandle, 6416 hClient, 6417 hPhysMemory, 6418 flags); 6419 if (status != NV_OK) 6420 goto freeGpaMemdesc; 6421 6422 *hDupMemory = dupedMemHandle; 6423 6424 freeGpaMemdesc: 6425 if (pAdjustedMemDesc != pMemDesc) 6426 fabricvaspacePutGpaMemdesc(pFabricVAS, pAdjustedMemDesc); 6427 6428 done: 6429 _nvGpuOpsLocksRelease(&acquiredLocks); 6430 threadStateFree(&threadState, THREAD_STATE_FLAGS_NONE); 6431 return status; 6432 } 6433 6434 NV_STATUS nvGpuOpsDupMemory(struct gpuDevice *device, 6435 NvHandle hClient, 6436 NvHandle hPhysMemory, 6437 NvHandle *hDupMemory, 6438 gpuMemoryInfo *pGpuMemoryInfo) 6439 { 6440 return dupMemory(device, 6441 hClient, 6442 hPhysMemory, 6443 NV04_DUP_HANDLE_FLAGS_REJECT_KERNEL_DUP_PRIVILEGE, 6444 hDupMemory, 6445 pGpuMemoryInfo); 6446 } 6447 6448 NV_STATUS nvGpuOpsDupAllocation(struct gpuAddressSpace *srcVaSpace, 6449 NvU64 srcAddress, 6450 struct gpuAddressSpace *dstVaSpace, 6451 NvU64 dstVaAlignment, 6452 NvU64 *dstAddress) 6453 { 6454 NV_STATUS status; 6455 NvHandle dstPhysHandle = 0; 6456 NvHandle srcPhysHandle = 0; 6457 NvU64 tmpDstAddress = 0; 6458 gpuMemoryInfo gpuMemoryInfo = {0}; 6459 gpuVaAllocInfo allocInfo = {0}; 6460 struct allocFlags flags = {0}; 6461 RM_API *pRmApi = rmapiGetInterface(RMAPI_EXTERNAL_KERNEL); 6462 6463 NV_CHECK_OR_RETURN(LEVEL_ERROR, srcVaSpace != 0, NV_ERR_INVALID_ARGUMENT); 6464 NV_CHECK_OR_RETURN(LEVEL_ERROR, dstVaSpace != 0, NV_ERR_INVALID_ARGUMENT); 6465 6466 NV_CHECK_OR_RETURN(LEVEL_ERROR, srcVaSpace != dstVaSpace, NV_ERR_INVALID_ARGUMENT); 6467 NV_CHECK_OR_RETURN(LEVEL_ERROR, srcAddress != 0, NV_ERR_INVALID_ARGUMENT); 6468 NV_CHECK_OR_RETURN(LEVEL_ERROR, dstAddress != NULL, NV_ERR_INVALID_ARGUMENT); 6469 6470 // If the given combination of source VA space and address does not 6471 // correspond to a previous allocation, the physical handle retrieval fails 6472 status = getHandleForVirtualAddr(srcVaSpace, srcAddress, NV_TRUE, &srcPhysHandle); 6473 if (status != NV_OK) 6474 return status; 6475 6476 // Dupe the physical allocation, and return information about the associated 6477 // memory descriptor 6478 // 6479 // Passing NV04_DUP_HANDLE_FLAGS_NONE allows duping across MIG partitions 6480 status = dupMemory(dstVaSpace->device, 6481 srcVaSpace->device->session->handle, 6482 srcPhysHandle, 6483 NV04_DUP_HANDLE_FLAGS_NONE, 6484 &dstPhysHandle, 6485 &gpuMemoryInfo); 6486 6487 if (status != NV_OK) 6488 return status; 6489 6490 // Vidmem dups across GPUs are not currently supported 6491 if (!gpuMemoryInfo.sysmem && (srcVaSpace->device != dstVaSpace->device)) 6492 { 6493 status = NV_ERR_NOT_SUPPORTED; 6494 goto cleanup_dup; 6495 } 6496 6497 // The virtual allocation and mapping use the size, page size, and alignment 6498 // of the destination memory descriptor. 6499 allocInfo.pageSize = gpuMemoryInfo.pageSize; 6500 allocInfo.alignment = dstVaAlignment; 6501 6502 status = nvGpuOpsAllocVirtual(dstVaSpace, 6503 gpuMemoryInfo.size, 6504 dstAddress, 6505 dstPhysHandle, 6506 flags, 6507 &allocInfo); 6508 if (status != NV_OK) 6509 goto cleanup_dup; 6510 6511 // Map the entire memory 6512 status = nvGpuOpsMapGpuMemory(dstVaSpace, 6513 *dstAddress, 6514 gpuMemoryInfo.size, 6515 gpuMemoryInfo.pageSize, 6516 &tmpDstAddress, 6517 flags); 6518 6519 if (status != NV_OK) 6520 goto cleanup_virt_allocation; 6521 6522 NV_ASSERT(tmpDstAddress == *dstAddress); 6523 6524 return NV_OK; 6525 6526 cleanup_virt_allocation: 6527 nvGpuOpsFreeVirtual(dstVaSpace, *dstAddress); 6528 6529 cleanup_dup: 6530 pRmApi->Free(pRmApi, dstVaSpace->device->session->handle, dstPhysHandle); 6531 return status; 6532 } 6533 6534 NV_STATUS nvGpuOpsGetGuid(NvHandle hClient, NvHandle hDevice, 6535 NvHandle hSubDevice, NvU8 *gpuGuid, 6536 unsigned guidLength) 6537 { 6538 NV_STATUS status; 6539 nvGpuOpsLockSet acquiredLocks; 6540 THREAD_STATE_NODE threadState; 6541 NV2080_CTRL_GPU_GET_GID_INFO_PARAMS getGidParams = {0}; 6542 RM_API *pRmApi = rmapiGetInterface(RMAPI_GPU_LOCK_INTERNAL); 6543 6544 if (!gpuGuid) 6545 return NV_ERR_INVALID_ARGUMENT; 6546 6547 threadStateInit(&threadState, THREAD_STATE_FLAGS_NONE); 6548 status = _nvGpuOpsLocksAcquireAll(RMAPI_LOCK_FLAGS_READ, hClient, NULL, &acquiredLocks); 6549 if (status != NV_OK) 6550 { 6551 threadStateFree(&threadState, THREAD_STATE_FLAGS_NONE); 6552 return status; 6553 } 6554 6555 getGidParams.index = 0; 6556 getGidParams.flags = NV2080_GPU_CMD_GPU_GET_GID_FLAGS_FORMAT_BINARY; 6557 status = pRmApi->Control(pRmApi, 6558 hClient, hSubDevice, 6559 NV2080_CTRL_CMD_GPU_GET_GID_INFO, 6560 &getGidParams, 6561 sizeof(getGidParams)); 6562 6563 if ((guidLength != getGidParams.length) || (status != NV_OK)) 6564 { 6565 _nvGpuOpsLocksRelease(&acquiredLocks); 6566 threadStateFree(&threadState, THREAD_STATE_FLAGS_NONE); 6567 return NV_ERR_INVALID_ARGUMENT; 6568 } 6569 6570 portMemCopy(gpuGuid, guidLength, &getGidParams.data, guidLength); 6571 6572 _nvGpuOpsLocksRelease(&acquiredLocks); 6573 threadStateFree(&threadState, THREAD_STATE_FLAGS_NONE); 6574 return status; 6575 } 6576 6577 // Make sure UVM_COPY_ENGINE_COUNT_MAX is at least the number of copy engines 6578 // supported by RM. 6579 ct_assert(UVM_COPY_ENGINE_COUNT_MAX >= NV2080_ENGINE_TYPE_COPY_SIZE); 6580 6581 static void setCeCaps(const NvU8 *rmCeCaps, gpuCeCaps *ceCaps) 6582 { 6583 ceCaps->grce = !!NV2080_CTRL_CE_GET_CAP(rmCeCaps, NV2080_CTRL_CE_CAPS_CE_GRCE); 6584 ceCaps->shared = !!NV2080_CTRL_CE_GET_CAP(rmCeCaps, NV2080_CTRL_CE_CAPS_CE_SHARED); 6585 ceCaps->sysmemRead = !!NV2080_CTRL_CE_GET_CAP(rmCeCaps, NV2080_CTRL_CE_CAPS_CE_SYSMEM_READ); 6586 ceCaps->sysmemWrite = !!NV2080_CTRL_CE_GET_CAP(rmCeCaps, NV2080_CTRL_CE_CAPS_CE_SYSMEM_WRITE); 6587 ceCaps->nvlinkP2p = !!NV2080_CTRL_CE_GET_CAP(rmCeCaps, NV2080_CTRL_CE_CAPS_CE_NVLINK_P2P); 6588 ceCaps->sysmem = !!NV2080_CTRL_CE_GET_CAP(rmCeCaps, NV2080_CTRL_CE_CAPS_CE_SYSMEM); 6589 ceCaps->p2p = !!NV2080_CTRL_CE_GET_CAP(rmCeCaps, NV2080_CTRL_CE_CAPS_CE_P2P); 6590 } 6591 6592 static NV_STATUS queryCopyEngines(struct gpuDevice *gpu, gpuCesCaps *cesCaps) 6593 { 6594 NV_STATUS status = NV_OK; 6595 NV2080_CTRL_GPU_GET_ENGINES_PARAMS getEnginesParams = {0}; 6596 NvU32 *engineList; 6597 NvU32 i; 6598 RM_API *pRmApi = rmapiGetInterface(RMAPI_GPU_LOCK_INTERNAL); 6599 6600 status = pRmApi->Control(pRmApi, 6601 gpu->session->handle, 6602 gpu->subhandle, 6603 NV2080_CTRL_CMD_GPU_GET_ENGINES, 6604 &getEnginesParams, 6605 sizeof(getEnginesParams)); 6606 if (status != NV_OK) 6607 return status; 6608 6609 engineList = portMemAllocNonPaged( 6610 sizeof(*engineList) * getEnginesParams.engineCount); 6611 if (engineList == NULL) 6612 return NV_ERR_NO_MEMORY; 6613 6614 getEnginesParams.engineList = NV_PTR_TO_NvP64(engineList); 6615 6616 status = pRmApi->Control(pRmApi, 6617 gpu->session->handle, 6618 gpu->subhandle, 6619 NV2080_CTRL_CMD_GPU_GET_ENGINES, 6620 &getEnginesParams, 6621 sizeof(getEnginesParams)); 6622 if (status != NV_OK) 6623 goto done; 6624 6625 portMemSet(cesCaps, 0, sizeof(*cesCaps)); 6626 6627 for (i = 0; i < getEnginesParams.engineCount; i++) 6628 { 6629 NV2080_CTRL_CE_GET_CAPS_PARAMS ceParams = {0}; 6630 NV2080_CTRL_CE_GET_CE_PCE_MASK_PARAMS pceMaskParams = {0}; 6631 NvU8 rmCeCaps[NV2080_CTRL_CE_CAPS_TBL_SIZE] = {0}; 6632 UvmGpuCopyEngineCaps *ceCaps; 6633 NvU32 ceIndex; 6634 6635 if (!NV2080_ENGINE_TYPE_IS_COPY(engineList[i])) 6636 continue; 6637 6638 ceIndex = NV2080_ENGINE_TYPE_COPY_IDX(engineList[i]); 6639 if (ceIndex >= NV2080_ENGINE_TYPE_COPY_SIZE) 6640 continue; 6641 6642 ceParams.ceEngineType = NV2080_ENGINE_TYPE_COPY(ceIndex); 6643 ceParams.capsTblSize = NV2080_CTRL_CE_CAPS_TBL_SIZE; 6644 ceParams.capsTbl = NV_PTR_TO_NvP64(rmCeCaps); 6645 6646 status = pRmApi->Control(pRmApi, 6647 gpu->session->handle, 6648 gpu->subhandle, 6649 NV2080_CTRL_CMD_CE_GET_CAPS, 6650 &ceParams, 6651 sizeof(ceParams)); 6652 if (status != NV_OK) 6653 { 6654 NV_PRINTF(LEVEL_ERROR, "%s:%d: %s\n", __FUNCTION__, 6655 __LINE__, nvstatusToString(status)); 6656 goto done; 6657 } 6658 6659 ceCaps = cesCaps->copyEngineCaps + ceIndex; 6660 setCeCaps(rmCeCaps, ceCaps); 6661 6662 pceMaskParams.ceEngineType = NV2080_ENGINE_TYPE_COPY(ceIndex); 6663 pceMaskParams.pceMask = 0; 6664 status = pRmApi->Control(pRmApi, 6665 gpu->session->handle, 6666 gpu->subhandle, 6667 NV2080_CTRL_CMD_CE_GET_CE_PCE_MASK, 6668 &pceMaskParams, 6669 sizeof(pceMaskParams)); 6670 if (status != NV_OK) 6671 { 6672 NV_PRINTF(LEVEL_ERROR, "%s:%d: %s\n", __FUNCTION__, 6673 __LINE__, nvstatusToString(status)); 6674 goto done; 6675 } 6676 ceCaps->cePceMask = pceMaskParams.pceMask; 6677 6678 ceCaps->supported = NV_TRUE; 6679 } 6680 6681 done: 6682 portMemFree(engineList); 6683 return status; 6684 } 6685 6686 static NvBool isClassHost(NvU32 class) 6687 { 6688 NvBool bHostClass = NV_FALSE; 6689 CLI_CHANNEL_CLASS_INFO classInfo; 6690 CliGetChannelClassInfo(class, &classInfo); 6691 bHostClass = (classInfo.classType == CHANNEL_CLASS_TYPE_GPFIFO); 6692 return bHostClass; 6693 } 6694 6695 static NvBool isClassCE(NvU32 class) 6696 { 6697 switch (class) 6698 { 6699 case MAXWELL_DMA_COPY_A: 6700 case PASCAL_DMA_COPY_A: 6701 case PASCAL_DMA_COPY_B: 6702 case VOLTA_DMA_COPY_A: 6703 case TURING_DMA_COPY_A: 6704 case AMPERE_DMA_COPY_A: 6705 case AMPERE_DMA_COPY_B: 6706 case HOPPER_DMA_COPY_A: 6707 return NV_TRUE; 6708 6709 default: 6710 return NV_FALSE; 6711 } 6712 } 6713 6714 static NvBool isClassSec2(NvU32 class) 6715 { 6716 switch (class) 6717 { 6718 case HOPPER_SEC2_WORK_LAUNCH_A: 6719 return NV_TRUE; 6720 default: 6721 return NV_FALSE; 6722 } 6723 } 6724 6725 static NvBool isClassCompute(NvU32 class) 6726 { 6727 switch (class) 6728 { 6729 case MAXWELL_COMPUTE_A: 6730 case MAXWELL_COMPUTE_B: 6731 case PASCAL_COMPUTE_A: 6732 case PASCAL_COMPUTE_B: 6733 case VOLTA_COMPUTE_A: 6734 case VOLTA_COMPUTE_B: 6735 case TURING_COMPUTE_A: 6736 case AMPERE_COMPUTE_A: 6737 case AMPERE_COMPUTE_B: 6738 case HOPPER_COMPUTE_A: 6739 return NV_TRUE; 6740 6741 default: 6742 return NV_FALSE; 6743 } 6744 } 6745 6746 static NvBool isClassFaultBuffer(NvU32 class) 6747 { 6748 switch (class) 6749 { 6750 case MAXWELL_FAULT_BUFFER_A: 6751 case MMU_FAULT_BUFFER: 6752 return NV_TRUE; 6753 6754 default: 6755 return NV_FALSE; 6756 } 6757 } 6758 6759 static NvBool isClassAccessCounterBuffer(NvU32 class) 6760 { 6761 switch (class) 6762 { 6763 case ACCESS_COUNTER_NOTIFY_BUFFER: 6764 return NV_TRUE; 6765 6766 default: 6767 return NV_FALSE; 6768 } 6769 } 6770 6771 static NV_STATUS findDeviceClasses(NvHandle hRoot, 6772 NvHandle hDevice, 6773 NvHandle hSubdevice, 6774 NvU32 *hostClass, 6775 NvU32 *ceClass, 6776 NvU32 *computeClass, 6777 NvU32 *faultBufferClass, 6778 NvU32 *accessCounterBufferClass, 6779 NvU32 *sec2Class) 6780 { 6781 NvU32 *classList; 6782 NV_STATUS status = NV_OK; 6783 NV0080_CTRL_GPU_GET_CLASSLIST_PARAMS classParams = {0}; 6784 NvU32 i = 0; 6785 RM_API *pRmApi = rmapiGetInterface(RMAPI_EXTERNAL_KERNEL); 6786 6787 *hostClass = 0; 6788 *ceClass = 0; 6789 *computeClass = 0; 6790 *faultBufferClass = 0; 6791 *accessCounterBufferClass = 0; 6792 6793 status = pRmApi->Control(pRmApi, 6794 hRoot, 6795 hDevice, 6796 NV0080_CTRL_CMD_GPU_GET_CLASSLIST, 6797 &classParams, 6798 sizeof(classParams)); 6799 if (status != NV_OK) 6800 return status; 6801 6802 classList = portMemAllocNonPaged( 6803 (sizeof(NvU32) * classParams.numClasses)); 6804 6805 if (classList == NULL) 6806 { 6807 return NV_ERR_INSUFFICIENT_RESOURCES; 6808 } 6809 6810 classParams.classList = NV_PTR_TO_NvP64(classList); 6811 status = pRmApi->Control(pRmApi, 6812 hRoot, 6813 hDevice, 6814 NV0080_CTRL_CMD_GPU_GET_CLASSLIST, 6815 &classParams, 6816 sizeof(classParams)); 6817 6818 if (status != NV_OK) 6819 goto Cleanup_classlist; 6820 6821 for (i = 0; i < classParams.numClasses; i++) 6822 { 6823 if (classList[i] == PHYSICAL_CHANNEL_GPFIFO) 6824 continue; 6825 if (isClassHost(classList[i])) 6826 *hostClass = NV_MAX(*hostClass, classList[i]); 6827 else if (isClassCE(classList[i])) 6828 *ceClass = NV_MAX(*ceClass, classList[i]); 6829 else if (isClassCompute(classList[i])) 6830 *computeClass = NV_MAX(*computeClass, classList[i]); 6831 else if (isClassFaultBuffer(classList[i])) 6832 *faultBufferClass = NV_MAX(*faultBufferClass, classList[i]); 6833 else if (isClassAccessCounterBuffer(classList[i])) 6834 { 6835 NV_ASSERT(accessCounterBufferClass); 6836 *accessCounterBufferClass = NV_MAX(*accessCounterBufferClass, classList[i]); 6837 } 6838 else if (isClassSec2(classList[i])) 6839 *sec2Class = NV_MAX(*sec2Class, classList[i]); 6840 } 6841 6842 Cleanup_classlist: 6843 portMemFree(classList); 6844 return status; 6845 } 6846 6847 NV_STATUS nvGpuOpsGetClientInfoFromPid(unsigned pid, 6848 const NvU8 *gpuUuid, 6849 NvHandle *hClient, 6850 NvHandle *hDevice, 6851 NvHandle *hSubDevice) 6852 { 6853 NV0000_CTRL_GPU_GET_UUID_INFO_PARAMS gpuIdInfoParams = {{0}}; 6854 unsigned gpuId = 0; 6855 NvHandle hPidClient = 0; 6856 NvHandle hPidDevice = 0; 6857 NvHandle hPidVaSpace = 0; 6858 NvHandle hPidSubDevice = 0; 6859 NvHandle clientHandle = 0; 6860 NV_STATUS status; 6861 nvGpuOpsLockSet acquiredLocks; 6862 THREAD_STATE_NODE threadState; 6863 RM_API *pRmApi = rmapiGetInterface(RMAPI_GPU_LOCK_INTERNAL); 6864 6865 threadStateInit(&threadState, THREAD_STATE_FLAGS_NONE); 6866 status = _nvGpuOpsLocksAcquireAll(RMAPI_LOCK_FLAGS_NONE, NV01_NULL_OBJECT, NULL, &acquiredLocks); 6867 if (status != NV_OK) 6868 { 6869 threadStateFree(&threadState, THREAD_STATE_FLAGS_NONE); 6870 return status; 6871 } 6872 6873 status = nvGpuOpsCreateClient(pRmApi, &clientHandle); 6874 if (status != NV_OK) 6875 { 6876 _nvGpuOpsLocksRelease(&acquiredLocks); 6877 threadStateFree(&threadState, THREAD_STATE_FLAGS_NONE); 6878 return status; 6879 } 6880 6881 // find the gpuId from the given uuid 6882 portMemCopy(&gpuIdInfoParams.gpuUuid, NV_GPU_UUID_LEN, gpuUuid, NV_GPU_UUID_LEN); 6883 gpuIdInfoParams.flags = NV0000_CTRL_CMD_GPU_GET_UUID_INFO_FLAGS_FORMAT_BINARY; 6884 status = pRmApi->Control(pRmApi, 6885 clientHandle, 6886 clientHandle, 6887 NV0000_CTRL_CMD_GPU_GET_UUID_INFO, 6888 &gpuIdInfoParams, 6889 sizeof(gpuIdInfoParams)); 6890 if (status != NV_OK) 6891 goto cleanup; 6892 6893 gpuId = gpuIdInfoParams.gpuId; 6894 6895 status = findVaspaceFromPid(pid, gpuId, &hPidClient, 6896 &hPidDevice, &hPidSubDevice, &hPidVaSpace); 6897 6898 // free the session we just created 6899 pRmApi->Free(pRmApi, clientHandle, clientHandle); 6900 if (status != NV_OK) 6901 goto cleanup; 6902 6903 *hClient = hPidClient; 6904 *hDevice = hPidDevice; 6905 *hSubDevice = hPidSubDevice; 6906 _nvGpuOpsLocksRelease(&acquiredLocks); 6907 threadStateFree(&threadState, THREAD_STATE_FLAGS_NONE); 6908 return NV_OK; 6909 6910 cleanup: 6911 *hClient = 0; 6912 *hDevice = 0; 6913 *hSubDevice = 0; 6914 _nvGpuOpsLocksRelease(&acquiredLocks); 6915 threadStateFree(&threadState, THREAD_STATE_FLAGS_NONE); 6916 return status; 6917 } 6918 6919 NV_STATUS nvGpuOpsSetPageDirectory(struct gpuAddressSpace *vaSpace, 6920 NvU64 physAddress, 6921 unsigned numEntries, 6922 NvBool bVidMemAperture, NvU32 pasid) 6923 { 6924 NV_STATUS status; 6925 nvGpuOpsLockSet acquiredLocks; 6926 THREAD_STATE_NODE threadState; 6927 NV0080_CTRL_DMA_SET_PAGE_DIRECTORY_PARAMS params = {0}; 6928 OBJGPU *pGpu = NULL; 6929 OBJVASPACE *pVAS = NULL; 6930 RsClient *pClient; 6931 RM_API *pRmApi = rmapiGetInterface(RMAPI_GPU_LOCK_INTERNAL); 6932 6933 if (!vaSpace || !numEntries) 6934 return NV_ERR_INVALID_ARGUMENT; 6935 6936 threadStateInit(&threadState, THREAD_STATE_FLAGS_NONE); 6937 status = _nvGpuOpsLocksAcquireAll(RMAPI_LOCK_FLAGS_READ, 6938 vaSpace->device->session->handle, 6939 &pClient, 6940 &acquiredLocks); 6941 if (status != NV_OK) 6942 { 6943 threadStateFree(&threadState, THREAD_STATE_FLAGS_NONE); 6944 return status; 6945 } 6946 6947 status = CliSetGpuContext(vaSpace->device->session->handle, 6948 vaSpace->device->handle, 6949 &pGpu, 6950 NULL); 6951 if (status != NV_OK) 6952 { 6953 _nvGpuOpsLocksRelease(&acquiredLocks); 6954 threadStateFree(&threadState, THREAD_STATE_FLAGS_NONE); 6955 return status; 6956 } 6957 6958 status = vaspaceGetByHandleOrDeviceDefault(pClient, 6959 vaSpace->device->handle, 6960 vaSpace->handle, 6961 &pVAS); 6962 if ((status != NV_OK) || (pVAS == NULL)) 6963 { 6964 _nvGpuOpsLocksRelease(&acquiredLocks); 6965 threadStateFree(&threadState, THREAD_STATE_FLAGS_NONE); 6966 return NV_ERR_INVALID_ARGUMENT; 6967 } 6968 6969 if (vaspaceIsExternallyOwned(pVAS)) 6970 { 6971 // make sure there is no PDB set if already externally owned 6972 if ((NULL != vaspaceGetPageDirBase(pVAS, pGpu))) 6973 { 6974 _nvGpuOpsLocksRelease(&acquiredLocks); 6975 threadStateFree(&threadState, THREAD_STATE_FLAGS_NONE); 6976 return NV_ERR_NOT_SUPPORTED; 6977 } 6978 6979 // Stop all channels under the VAS 6980 status = nvGpuOpsDisableVaSpaceChannels(vaSpace); 6981 if (status != NV_OK) 6982 { 6983 // 6984 // If stopping any channels failed, reenable the channels which were 6985 // able to be stopped before bailing 6986 // 6987 nvGpuOpsEnableVaSpaceChannels(vaSpace); 6988 _nvGpuOpsLocksRelease(&acquiredLocks); 6989 threadStateFree(&threadState, THREAD_STATE_FLAGS_NONE); 6990 return status; 6991 } 6992 } 6993 6994 params.physAddress = physAddress; 6995 params.numEntries = numEntries; 6996 params.hVASpace = vaSpace->handle; 6997 params.flags = bVidMemAperture ? 6998 DRF_DEF(0080, _CTRL_DMA_SET_PAGE_DIRECTORY_FLAGS, _APERTURE, _VIDMEM) : 6999 DRF_DEF(0080, _CTRL_DMA_SET_PAGE_DIRECTORY_FLAGS, _APERTURE, _SYSMEM_COH); 7000 params.flags |= DRF_DEF(0080, _CTRL_DMA_SET_PAGE_DIRECTORY_FLAGS, 7001 _ALL_CHANNELS, _TRUE); 7002 params.pasid = pasid; 7003 7004 // Always do Unicast by passing non-zero subDeviceId! 7005 params.subDeviceId = vaSpace->device->subdeviceInstance + 1; 7006 7007 status = pRmApi->Control(pRmApi, 7008 vaSpace->device->session->handle, 7009 vaSpace->device->handle, 7010 NV0080_CTRL_CMD_DMA_SET_PAGE_DIRECTORY, 7011 ¶ms, 7012 sizeof(params)); 7013 7014 if (vaspaceIsExternallyOwned(pVAS)) 7015 { 7016 // Reschedule all channels in this VAS 7017 nvGpuOpsEnableVaSpaceChannels(vaSpace); 7018 } 7019 7020 _nvGpuOpsLocksRelease(&acquiredLocks); 7021 threadStateFree(&threadState, THREAD_STATE_FLAGS_NONE); 7022 return status; 7023 } 7024 7025 NV_STATUS nvGpuOpsUnsetPageDirectory(struct gpuAddressSpace *vaSpace) 7026 { 7027 NV_STATUS status; 7028 nvGpuOpsLockSet acquiredLocks; 7029 THREAD_STATE_NODE threadState; 7030 NV0080_CTRL_DMA_UNSET_PAGE_DIRECTORY_PARAMS params = {0}; 7031 OBJGPU *pGpu = NULL; 7032 OBJVASPACE *pVAS = NULL; 7033 RsClient *pClient; 7034 RM_API *pRmApi = rmapiGetInterface(RMAPI_GPU_LOCK_INTERNAL); 7035 7036 if (!vaSpace) 7037 return NV_ERR_INVALID_ARGUMENT; 7038 7039 threadStateInit(&threadState, THREAD_STATE_FLAGS_NONE); 7040 status = _nvGpuOpsLocksAcquireAll(RMAPI_LOCK_FLAGS_READ, 7041 vaSpace->device->session->handle, 7042 &pClient, 7043 &acquiredLocks); 7044 if (status != NV_OK) 7045 { 7046 threadStateFree(&threadState, THREAD_STATE_FLAGS_NONE); 7047 return status; 7048 } 7049 7050 status = CliSetGpuContext(vaSpace->device->session->handle, 7051 vaSpace->device->handle, 7052 &pGpu, 7053 NULL); 7054 if (status != NV_OK) 7055 { 7056 _nvGpuOpsLocksRelease(&acquiredLocks); 7057 threadStateFree(&threadState, THREAD_STATE_FLAGS_NONE); 7058 return status; 7059 } 7060 7061 status = vaspaceGetByHandleOrDeviceDefault(pClient, 7062 vaSpace->device->handle, 7063 vaSpace->handle, 7064 &pVAS); 7065 if ((status != NV_OK) || (pVAS == NULL)) 7066 { 7067 _nvGpuOpsLocksRelease(&acquiredLocks); 7068 threadStateFree(&threadState, THREAD_STATE_FLAGS_NONE); 7069 return NV_ERR_INVALID_ARGUMENT; 7070 } 7071 7072 if (vaspaceIsExternallyOwned(pVAS)) 7073 { 7074 // Stop all channels under the VAS 7075 status = nvGpuOpsDisableVaSpaceChannels(vaSpace); 7076 if (status != NV_OK) 7077 { 7078 // 7079 // If stopping any channels failed, reenable the channels which were 7080 // able to be stopped before bailing 7081 // 7082 nvGpuOpsEnableVaSpaceChannels(vaSpace); 7083 _nvGpuOpsLocksRelease(&acquiredLocks); 7084 threadStateFree(&threadState, THREAD_STATE_FLAGS_NONE); 7085 return status; 7086 } 7087 } 7088 7089 params.hVASpace = vaSpace->handle; 7090 7091 // Always do Unicast by passing non-zero subDeviceId! 7092 params.subDeviceId = vaSpace->device->subdeviceInstance + 1; 7093 7094 status = pRmApi->Control(pRmApi, 7095 vaSpace->device->session->handle, 7096 vaSpace->device->handle, 7097 NV0080_CTRL_CMD_DMA_UNSET_PAGE_DIRECTORY, 7098 ¶ms, 7099 sizeof(params)); 7100 7101 if (vaspaceIsExternallyOwned(pVAS)) 7102 { 7103 // Reschedule all channels in this VAS 7104 nvGpuOpsEnableVaSpaceChannels(vaSpace); 7105 } 7106 7107 _nvGpuOpsLocksRelease(&acquiredLocks); 7108 threadStateFree(&threadState, THREAD_STATE_FLAGS_NONE); 7109 return status; 7110 } 7111 7112 NV_STATUS nvGpuOpsGetGmmuFmt(struct gpuAddressSpace *vaSpace, void **pFmt) 7113 { 7114 NV_STATUS status = NV_OK; 7115 nvGpuOpsLockSet acquiredLocks; 7116 THREAD_STATE_NODE threadState; 7117 NV90F1_CTRL_VASPACE_GET_GMMU_FORMAT_PARAMS params = {0}; 7118 RM_API *pRmApi = rmapiGetInterface(RMAPI_GPU_LOCK_INTERNAL); 7119 7120 if (!vaSpace || !pFmt) 7121 return NV_ERR_INVALID_ARGUMENT; 7122 7123 if (!vaSpace->handle) 7124 return NV_ERR_INVALID_OBJECT_HANDLE; 7125 7126 threadStateInit(&threadState, THREAD_STATE_FLAGS_NONE); 7127 status = _nvGpuOpsLocksAcquireAll(RMAPI_LOCK_FLAGS_READ, 7128 vaSpace->device->session->handle, 7129 NULL, 7130 &acquiredLocks); 7131 if (status != NV_OK) 7132 { 7133 threadStateFree(&threadState, THREAD_STATE_FLAGS_NONE); 7134 return status; 7135 } 7136 7137 *pFmt = NULL; 7138 params.hSubDevice = vaSpace->device->subhandle; 7139 7140 status = pRmApi->Control(pRmApi, 7141 vaSpace->device->session->handle, 7142 vaSpace->handle, 7143 NV90F1_CTRL_CMD_VASPACE_GET_GMMU_FORMAT, 7144 ¶ms, 7145 sizeof(params)); 7146 if (status == NV_OK) 7147 *pFmt = (void *)params.pFmt; 7148 7149 _nvGpuOpsLocksRelease(&acquiredLocks); 7150 threadStateFree(&threadState, THREAD_STATE_FLAGS_NONE); 7151 return status; 7152 } 7153 7154 NV_STATUS nvGpuOpsInvalidateTlb(struct gpuAddressSpace *vaSpace) 7155 { 7156 NV2080_CTRL_DMA_INVALIDATE_TLB_PARAMS params = {0}; 7157 RM_API *pRmApi = rmapiGetInterface(RMAPI_EXTERNAL_KERNEL); 7158 7159 if (!vaSpace) 7160 return NV_ERR_INVALID_ARGUMENT; 7161 7162 params.hVASpace = vaSpace->handle; 7163 return pRmApi->Control(pRmApi, 7164 vaSpace->device->session->handle, 7165 vaSpace->device->subhandle, 7166 NV2080_CTRL_CMD_DMA_INVALIDATE_TLB, 7167 ¶ms, 7168 sizeof(params)); 7169 } 7170 7171 NV_STATUS nvGpuOpsGetFbInfo(struct gpuDevice *device, gpuFbInfo *fbInfo) 7172 { 7173 NV_STATUS status; 7174 nvGpuOpsLockSet acquiredLocks; 7175 THREAD_STATE_NODE threadState; 7176 7177 if (!device || !fbInfo) 7178 return NV_ERR_INVALID_ARGUMENT; 7179 7180 threadStateInit(&threadState, THREAD_STATE_FLAGS_NONE); 7181 status = _nvGpuOpsLocksAcquireAll(RMAPI_LOCK_FLAGS_READ, 7182 device->session->handle, 7183 NULL, 7184 &acquiredLocks); 7185 if (status != NV_OK) 7186 { 7187 threadStateFree(&threadState, THREAD_STATE_FLAGS_NONE); 7188 return status; 7189 } 7190 7191 portMemCopy(fbInfo, sizeof(*fbInfo), &device->fbInfo, sizeof(*fbInfo)); 7192 7193 _nvGpuOpsLocksRelease(&acquiredLocks); 7194 threadStateFree(&threadState, THREAD_STATE_FLAGS_NONE); 7195 return NV_OK; 7196 } 7197 7198 NV_STATUS nvGpuOpsGetEccInfo(struct gpuDevice *device, gpuEccInfo *eccInfo) 7199 { 7200 subDeviceDesc *rmSubDevice; 7201 7202 if (!device || !eccInfo) 7203 return NV_ERR_INVALID_ARGUMENT; 7204 7205 rmSubDevice = device->rmSubDevice; 7206 7207 if (!rmSubDevice->bEccInitialized) 7208 return NV_ERR_NOT_SUPPORTED; 7209 7210 eccInfo->eccMask = rmSubDevice->eccMask; 7211 eccInfo->eccOffset = rmSubDevice->eccOffset; 7212 eccInfo->eccReadLocation = rmSubDevice->eccReadLocation; 7213 eccInfo->bEccEnabled = rmSubDevice->bEccEnabled; 7214 eccInfo->eccErrorNotifier = &rmSubDevice->eccErrorNotifier; 7215 7216 return NV_OK; 7217 } 7218 7219 // 7220 // Do not acquire the GPU locks as all nvGpuOpsFreeDupedHandle() does is 7221 // call pRmApi->Free(pRmApi, ) that drops the GPU locks if acquired (and 7222 // re-acquires it later). 7223 // 7224 NV_STATUS nvGpuOpsFreeDupedHandle(struct gpuDevice *device, 7225 NvHandle hPhysHandle) 7226 { 7227 NV_STATUS status = NV_OK; 7228 nvGpuOpsLockSet acquiredLocks; 7229 THREAD_STATE_NODE threadState; 7230 Memory *pMemory = NULL; 7231 OBJGPU *pMappingGpu = NULL; 7232 RM_API *pRmApi = rmapiGetInterface(RMAPI_GPU_LOCK_INTERNAL); 7233 RsClient *pClient; 7234 Subdevice *pSubdevice; 7235 NvHandle hClient; 7236 7237 if (!device) 7238 return NV_ERR_INVALID_ARGUMENT; 7239 7240 hClient = device->session->handle; 7241 7242 threadStateInit(&threadState, THREAD_STATE_FLAGS_NONE); 7243 status = _nvGpuOpsLocksAcquire(RMAPI_LOCK_FLAGS_READ, hClient, &pClient, 0, 0, 0, &acquiredLocks); 7244 if (status != NV_OK) 7245 { 7246 threadStateFree(&threadState, THREAD_STATE_FLAGS_NONE); 7247 return status; 7248 } 7249 7250 status = subdeviceGetByHandle(pClient, device->subhandle, &pSubdevice); 7251 if (status != NV_OK) 7252 goto out; 7253 7254 pMappingGpu = GPU_RES_GET_GPU(pSubdevice); 7255 7256 GPU_RES_SET_THREAD_BC_STATE(pSubdevice); 7257 7258 status = nvGpuOpsGetMemoryByHandle(device->session->handle, 7259 hPhysHandle, 7260 &pMemory); 7261 if (status != NV_OK) 7262 goto out; 7263 7264 if (memdescIsSysmem(pMemory->pMemDesc)) 7265 { 7266 // Release the mappings acquired in nvGpuOpsDupMemory(). 7267 // 7268 // TODO: Bug 1811060: Add native support for this use-case in RM API. 7269 memdescUnmapIommu(pMemory->pMemDesc, pMappingGpu->busInfo.iovaspaceId); 7270 } 7271 7272 out: 7273 pRmApi->Free(pRmApi, device->session->handle, hPhysHandle); 7274 _nvGpuOpsLocksRelease(&acquiredLocks); 7275 threadStateFree(&threadState, THREAD_STATE_FLAGS_NONE); 7276 return status; 7277 } 7278 7279 NV_STATUS nvGpuOpsInitFaultInfo(struct gpuDevice *device, 7280 gpuFaultInfo *pFaultInfo) 7281 { 7282 struct gpuSession *session = device->session; 7283 NV_STATUS status = NV_OK; 7284 NVB069_ALLOCATION_PARAMETERS faultBufferAllocParams = {0}; 7285 NVB069_CTRL_FAULTBUFFER_GET_SIZE_PARAMS sizeParams = {0}; 7286 NVB069_CTRL_CMD_FAULTBUFFER_GET_REGISTER_MAPPINGS_PARAMS registermappingsParams = {0}; 7287 void *bufferAddress = NULL; 7288 NvU32 faultBufferSize = 0; 7289 RM_API *pRmApi = rmapiGetInterface(RMAPI_EXTERNAL_KERNEL); 7290 RsClient *pClient; 7291 Device *pDevice; 7292 NvHandle hClient = device->session->handle; 7293 NvHandle hDevice = device->handle; 7294 UvmFaultMetadataPacket *bufferMetadata = NULL; 7295 7296 status = serverGetClientUnderLock(&g_resServ, hClient, &pClient); 7297 if (status != NV_OK) 7298 return status; 7299 7300 status = deviceGetByHandle(pClient, hDevice, &pDevice); 7301 if (status != NV_OK) 7302 return status; 7303 7304 pFaultInfo->pDevice = pDevice; 7305 7306 pFaultInfo->faultBufferHandle = NV01_NULL_OBJECT; 7307 status = pRmApi->Alloc(pRmApi, 7308 session->handle, 7309 device->subhandle, 7310 &pFaultInfo->faultBufferHandle, 7311 device->faultBufferClass, 7312 &faultBufferAllocParams, 7313 sizeof(faultBufferAllocParams)); 7314 if (status != NV_OK) 7315 goto cleanup; 7316 7317 7318 OBJGPU *pGpu; 7319 7320 pGpu = GPU_RES_GET_GPU(pDevice); 7321 // 7322 // When Hopper CC is enabled, UVM won't have direct access to the replayable 7323 // HW fault buffer. Instead, it will be using a shadow fault buffer in 7324 // unprotected sysmem and GSP-RM will be copying encrypted fault packets from the 7325 // HW fault buffer to this shadow buffer 7326 // 7327 if (!gpuIsCCFeatureEnabled(pGpu) || !gpuIsGspOwnedFaultBuffersEnabled(pGpu)) 7328 { 7329 // Get the Size of the fault buffer 7330 status = pRmApi->Control(pRmApi, 7331 session->handle, 7332 pFaultInfo->faultBufferHandle, 7333 NVB069_CTRL_CMD_FAULTBUFFER_GET_SIZE, 7334 &sizeParams, 7335 sizeof(sizeParams)); 7336 if (status != NV_OK) 7337 goto cleanup_fault_buffer; 7338 7339 faultBufferSize = sizeParams.faultBufferSize; 7340 7341 // Map the fault buffer pointer to CPU 7342 status = pRmApi->MapToCpu(pRmApi, 7343 session->handle, 7344 device->subhandle, 7345 pFaultInfo->faultBufferHandle, 7346 0, 7347 pFaultInfo->replayable.bufferSize, 7348 &bufferAddress, 7349 0); 7350 if (status != NV_OK) 7351 goto cleanup_fault_buffer; 7352 } 7353 7354 if (isDeviceVoltaPlus(device)) 7355 { 7356 NVC369_CTRL_MMU_FAULT_BUFFER_REGISTER_NON_REPLAY_BUF_PARAMS nonReplayableFaultsParams = {0}; 7357 7358 status = pRmApi->Control(pRmApi, 7359 session->handle, 7360 pFaultInfo->faultBufferHandle, 7361 NVC369_CTRL_CMD_MMU_FAULT_BUFFER_REGISTER_NON_REPLAY_BUF, 7362 &nonReplayableFaultsParams, 7363 sizeof(nonReplayableFaultsParams)); 7364 if (status != NV_OK) 7365 goto cleanup_fault_buffer; 7366 7367 pFaultInfo->nonReplayable.shadowBufferAddress = (void *)NvP64_VALUE(nonReplayableFaultsParams.pShadowBuffer); 7368 pFaultInfo->nonReplayable.shadowBufferContext = (void *)NvP64_VALUE(nonReplayableFaultsParams.pShadowBufferContext); 7369 pFaultInfo->nonReplayable.bufferSize = nonReplayableFaultsParams.bufferSize; 7370 pFaultInfo->nonReplayable.shadowBufferMetadata = (UvmFaultMetadataPacket *)NvP64_VALUE(nonReplayableFaultsParams.pShadowBufferMetadata); 7371 } 7372 7373 if (gpuIsCCFeatureEnabled(pGpu) && gpuIsGspOwnedFaultBuffersEnabled(pGpu)) 7374 { 7375 NVC369_CTRL_MMU_FAULT_BUFFER_REGISTER_REPLAY_BUF_PARAMS replayableFaultsParams = {0}; 7376 7377 // Allocate a shadow buffer for replayable faults in case Hopper CC is enabled 7378 status = pRmApi->Control(pRmApi, 7379 session->handle, 7380 pFaultInfo->faultBufferHandle, 7381 NVC369_CTRL_CMD_MMU_FAULT_BUFFER_REGISTER_REPLAY_BUF, 7382 &replayableFaultsParams, 7383 sizeof(replayableFaultsParams)); 7384 if (status != NV_OK) 7385 goto cleanup_fault_buffer; 7386 7387 bufferAddress = (void *)NvP64_VALUE(replayableFaultsParams.pShadowBuffer); 7388 faultBufferSize = replayableFaultsParams.bufferSize; 7389 7390 // Make sure that the UVM and RM sizes of the metadata packet are equal. 7391 ct_assert(sizeof(GMMU_FAULT_PACKET_METADATA) == sizeof(UvmFaultMetadataPacket)); 7392 bufferMetadata = (UvmFaultMetadataPacket *)NvP64_VALUE(replayableFaultsParams.pShadowBufferMetadata); 7393 7394 // Get the register mappings for non-replayable fault buffer 7395 portMemSet(®istermappingsParams, 0, sizeof(registermappingsParams)); 7396 7397 registermappingsParams.faultBufferType = NVB069_CTRL_FAULT_BUFFER_NON_REPLAYABLE; 7398 status = pRmApi->Control(pRmApi, 7399 session->handle, 7400 pFaultInfo->faultBufferHandle, 7401 NVB069_CTRL_CMD_FAULTBUFFER_GET_REGISTER_MAPPINGS, 7402 ®istermappingsParams, 7403 sizeof(registermappingsParams)); 7404 if (status != NV_OK) 7405 goto cleanup_fault_buffer; 7406 7407 pFaultInfo->nonReplayable.pFaultBufferPut = (NvU32*)(NvUPtr)registermappingsParams.pFaultBufferPut; 7408 } 7409 7410 registermappingsParams.faultBufferType = NVB069_CTRL_FAULT_BUFFER_REPLAYABLE; 7411 status = pRmApi->Control(pRmApi, 7412 session->handle, 7413 pFaultInfo->faultBufferHandle, 7414 NVB069_CTRL_CMD_FAULTBUFFER_GET_REGISTER_MAPPINGS, 7415 ®istermappingsParams, 7416 sizeof(registermappingsParams)); 7417 if (status != NV_OK) 7418 goto cleanup_fault_buffer; 7419 7420 pFaultInfo->replayable.pFaultBufferGet = (NvU32*)(NvUPtr)registermappingsParams.pFaultBufferGet; 7421 pFaultInfo->replayable.pFaultBufferPut = (NvU32*)(NvUPtr)registermappingsParams.pFaultBufferPut; 7422 pFaultInfo->replayable.pFaultBufferInfo = (NvU32*)(NvUPtr)registermappingsParams.pFaultBufferInfo; 7423 pFaultInfo->replayable.pPmcIntr = (NvU32*)(NvUPtr)registermappingsParams.pPmcIntr; 7424 pFaultInfo->replayable.pPmcIntrEnSet = (NvU32*)(NvUPtr)registermappingsParams.pPmcIntrEnSet; 7425 pFaultInfo->replayable.pPmcIntrEnClear = (NvU32*)(NvUPtr)registermappingsParams.pPmcIntrEnClear; 7426 pFaultInfo->replayable.replayableFaultMask = registermappingsParams.replayableFaultMask; 7427 pFaultInfo->replayable.pPrefetchCtrl = (NvU32*)(NvUPtr)registermappingsParams.pPrefetchCtrl; 7428 pFaultInfo->replayable.bufferSize = faultBufferSize; 7429 pFaultInfo->replayable.bufferAddress = bufferAddress; 7430 pFaultInfo->replayable.bufferMetadata = bufferMetadata; 7431 7432 if (gpuIsCCFeatureEnabled(pGpu) && gpuIsGspOwnedFaultBuffersEnabled(pGpu)) 7433 { 7434 KernelGmmu *pKernelGmmu = GPU_GET_KERNEL_GMMU(pGpu); 7435 7436 pFaultInfo->replayable.bUvmOwnsHwFaultBuffer = NV_FALSE; 7437 pFaultInfo->replayable.cslCtx.ctx = (struct ccslContext_t *) kgmmuGetShadowFaultBufferCslContext(pGpu, pKernelGmmu, REPLAYABLE_FAULT_BUFFER); 7438 if (pFaultInfo->replayable.cslCtx.ctx == NULL) 7439 { 7440 NV_PRINTF(LEVEL_ERROR, "Replayable buffer CSL context not allocated\n"); 7441 goto cleanup_fault_buffer; 7442 } 7443 } 7444 else 7445 { 7446 pFaultInfo->replayable.bUvmOwnsHwFaultBuffer = NV_TRUE; 7447 } 7448 7449 return NV_OK; 7450 7451 cleanup_fault_buffer: 7452 if (!gpuIsCCFeatureEnabled(pGpu) || !gpuIsGspOwnedFaultBuffersEnabled(pGpu)) 7453 { 7454 gpuDeviceUnmapCpuFreeHandle(device, 7455 pFaultInfo->faultBufferHandle, 7456 pFaultInfo->replayable.bufferAddress, 7457 0); 7458 } 7459 cleanup: 7460 portMemSet(pFaultInfo, 0, sizeof(*pFaultInfo)); 7461 return status; 7462 } 7463 7464 NV_STATUS nvGpuOpsInitAccessCntrInfo(struct gpuDevice *device, 7465 gpuAccessCntrInfo *pAccessCntrInfo, 7466 NvU32 accessCntrIndex) 7467 { 7468 struct gpuSession *session = device->session; 7469 NV_STATUS status = NV_OK; 7470 NV_ACCESS_COUNTER_NOTIFY_BUFFER_ALLOC_PARAMS accessCntrBufferAllocParams = {0}; 7471 NVC365_CTRL_ACCESS_CNTR_BUFFER_GET_SIZE_PARAMS sizeParams = {0}; 7472 NVC365_CTRL_ACCESS_CNTR_BUFFER_GET_REGISTER_MAPPINGS_PARAMS registermappings; 7473 void *bufferAddress; 7474 RM_API *pRmApi = rmapiGetInterface(RMAPI_EXTERNAL_KERNEL); 7475 OBJGPU *pGpu = NULL; 7476 7477 // TODO: Acquired because CliSetGpuContext expects RMAPI lock. Necessary? 7478 status = rmapiLockAcquire(RMAPI_LOCK_FLAGS_READ, RM_LOCK_MODULES_GPU_OPS); 7479 if (status != NV_OK) 7480 return status; 7481 status = CliSetGpuContext(session->handle, device->handle, &pGpu, NULL); 7482 rmapiLockRelease(); 7483 if (status != NV_OK) 7484 return status; 7485 7486 accessCntrBufferAllocParams.accessCounterIndex = accessCntrIndex; 7487 pAccessCntrInfo->accessCntrBufferHandle = NV01_NULL_OBJECT; 7488 status = pRmApi->Alloc(pRmApi, 7489 session->handle, 7490 device->subhandle, 7491 &pAccessCntrInfo->accessCntrBufferHandle, 7492 device->accessCounterBufferClass, 7493 &accessCntrBufferAllocParams, 7494 sizeof(accessCntrBufferAllocParams)); 7495 7496 if (status != NV_OK) 7497 goto cleanup; 7498 7499 status = pRmApi->MapToCpu(pRmApi, session->handle, device->subhandle, pAccessCntrInfo->accessCntrBufferHandle, 7500 0, pAccessCntrInfo->bufferSize, &bufferAddress, 0); 7501 7502 if (status != NV_OK) 7503 goto cleanup_access_ctr_buffer; 7504 7505 pAccessCntrInfo->bufferAddress = bufferAddress; 7506 7507 status = pRmApi->Control(pRmApi, 7508 session->handle, 7509 pAccessCntrInfo->accessCntrBufferHandle, 7510 NVC365_CTRL_CMD_ACCESS_CNTR_BUFFER_GET_SIZE, 7511 &sizeParams, 7512 sizeof(sizeParams)); 7513 7514 if (status != NV_OK) 7515 goto cleanup_access_ctr_buffer; 7516 7517 pAccessCntrInfo->bufferSize = sizeParams.accessCntrBufferSize; 7518 7519 status = pRmApi->Control(pRmApi, 7520 session->handle, 7521 pAccessCntrInfo->accessCntrBufferHandle, 7522 NVC365_CTRL_CMD_ACCESS_CNTR_BUFFER_GET_REGISTER_MAPPINGS, 7523 ®istermappings, 7524 sizeof(registermappings)); 7525 if (status != NV_OK) 7526 goto cleanup_access_ctr_buffer; 7527 7528 pAccessCntrInfo->pAccessCntrBufferGet = (NvU32*)(NvUPtr)registermappings.pAccessCntrBufferGet; 7529 pAccessCntrInfo->pAccessCntrBufferPut = (NvU32*)(NvUPtr)registermappings.pAccessCntrBufferPut; 7530 pAccessCntrInfo->pAccessCntrBufferFull = (NvU32*)(NvUPtr)registermappings.pAccessCntrBufferFull; 7531 pAccessCntrInfo->pHubIntr = (NvU32*)(NvUPtr)registermappings.pHubIntr; 7532 pAccessCntrInfo->pHubIntrEnSet = (NvU32*)(NvUPtr)registermappings.pHubIntrEnSet; 7533 pAccessCntrInfo->pHubIntrEnClear = (NvU32*)(NvUPtr)registermappings.pHubIntrEnClear; 7534 pAccessCntrInfo->accessCounterMask = registermappings.accessCntrMask; 7535 7536 return NV_OK; 7537 7538 cleanup_access_ctr_buffer: 7539 gpuDeviceUnmapCpuFreeHandle(device, 7540 pAccessCntrInfo->accessCntrBufferHandle, 7541 pAccessCntrInfo->bufferAddress, 7542 0); 7543 cleanup: 7544 pAccessCntrInfo->accessCntrBufferHandle = 0; 7545 pAccessCntrInfo->bufferAddress = 0; 7546 return status; 7547 } 7548 7549 static NV_STATUS 7550 getAccessCounterGranularityValue(UVM_ACCESS_COUNTER_GRANULARITY granularity, NvU32 *value) 7551 { 7552 *value = 0; 7553 7554 switch (granularity) 7555 { 7556 case UVM_ACCESS_COUNTER_GRANULARITY_64K: 7557 *value = NVC365_CTRL_ACCESS_COUNTER_GRANULARITY_64K; 7558 break; 7559 case UVM_ACCESS_COUNTER_GRANULARITY_2M: 7560 *value = NVC365_CTRL_ACCESS_COUNTER_GRANULARITY_2M; 7561 break; 7562 case UVM_ACCESS_COUNTER_GRANULARITY_16M: 7563 *value = NVC365_CTRL_ACCESS_COUNTER_GRANULARITY_16M; 7564 break; 7565 case UVM_ACCESS_COUNTER_GRANULARITY_16G: 7566 *value = NVC365_CTRL_ACCESS_COUNTER_GRANULARITY_16G; 7567 break; 7568 default: 7569 return NV_ERR_INVALID_ARGUMENT; 7570 }; 7571 7572 return NV_OK; 7573 } 7574 7575 static NV_STATUS 7576 getAccessCounterLimitValue(UVM_ACCESS_COUNTER_USE_LIMIT limit, NvU32 *value) 7577 { 7578 *value = 0; 7579 7580 switch (limit) 7581 { 7582 case UVM_ACCESS_COUNTER_USE_LIMIT_NONE: 7583 *value = NVC365_CTRL_ACCESS_COUNTER_USE_LIMIT_NONE; 7584 break; 7585 case UVM_ACCESS_COUNTER_USE_LIMIT_QTR: 7586 *value = NVC365_CTRL_ACCESS_COUNTER_USE_LIMIT_QTR; 7587 break; 7588 case UVM_ACCESS_COUNTER_USE_LIMIT_HALF: 7589 *value = NVC365_CTRL_ACCESS_COUNTER_USE_LIMIT_HALF; 7590 break; 7591 case UVM_ACCESS_COUNTER_USE_LIMIT_FULL: 7592 *value = NVC365_CTRL_ACCESS_COUNTER_USE_LIMIT_FULL; 7593 break; 7594 default: 7595 return NV_ERR_INVALID_ARGUMENT; 7596 }; 7597 7598 return NV_OK; 7599 } 7600 7601 NV_STATUS nvGpuOpsEnableAccessCntr(struct gpuDevice *device, 7602 gpuAccessCntrInfo *pAccessCntrInfo, 7603 gpuAccessCntrConfig *pAccessCntrConfig) 7604 { 7605 NV_STATUS status = NV_OK; 7606 NVC365_CTRL_ACCESS_CNTR_SET_CONFIG_PARAMS setConfigParams = { 0 }; 7607 NVC365_CTRL_ACCESS_CNTR_BUFFER_ENABLE_PARAMS enableParams = { 0 }; 7608 struct gpuSession *session = device->session; 7609 RM_API *pRmApi = rmapiGetInterface(RMAPI_EXTERNAL_KERNEL); 7610 7611 status = getAccessCounterGranularityValue(pAccessCntrConfig->mimcGranularity, &setConfigParams.mimcGranularity); 7612 if (status != NV_OK) 7613 return status; 7614 7615 status = getAccessCounterGranularityValue(pAccessCntrConfig->momcGranularity, &setConfigParams.momcGranularity); 7616 if (status != NV_OK) 7617 return status; 7618 7619 status = getAccessCounterLimitValue(pAccessCntrConfig->mimcUseLimit, &setConfigParams.mimcLimit); 7620 if (status != NV_OK) 7621 return status; 7622 7623 status = getAccessCounterLimitValue(pAccessCntrConfig->momcUseLimit, &setConfigParams.momcLimit); 7624 if (status != NV_OK) 7625 return status; 7626 7627 setConfigParams.threshold = pAccessCntrConfig->threshold; 7628 setConfigParams.cmd = NVC365_CTRL_ACCESS_COUNTER_SET_MIMC_GRANULARITY | 7629 NVC365_CTRL_ACCESS_COUNTER_SET_MOMC_GRANULARITY | 7630 NVC365_CTRL_ACCESS_COUNTER_SET_MIMC_LIMIT | 7631 NVC365_CTRL_ACCESS_COUNTER_SET_MOMC_LIMIT | 7632 NVC365_CTRL_ACCESS_COUNTER_SET_THRESHOLD; 7633 7634 status = pRmApi->Control(pRmApi, 7635 session->handle, 7636 pAccessCntrInfo->accessCntrBufferHandle, 7637 NVC365_CTRL_CMD_ACCESS_CNTR_SET_CONFIG, 7638 &setConfigParams, 7639 sizeof(setConfigParams)); 7640 if (status != NV_OK) 7641 return status; 7642 7643 enableParams.intrOwnership = NVC365_CTRL_ACCESS_COUNTER_INTERRUPT_OWNERSHIP_NOT_RM; 7644 enableParams.enable = NV_TRUE; 7645 7646 status = pRmApi->Control(pRmApi, 7647 session->handle, 7648 pAccessCntrInfo->accessCntrBufferHandle, 7649 NVC365_CTRL_CMD_ACCESS_CNTR_BUFFER_ENABLE, 7650 &enableParams, 7651 sizeof(enableParams)); 7652 return status; 7653 } 7654 7655 NV_STATUS nvGpuOpsDisableAccessCntr(struct gpuDevice *device, 7656 gpuAccessCntrInfo *pAccessCntrInfo) 7657 { 7658 RM_API *pRmApi = rmapiGetInterface(RMAPI_EXTERNAL_KERNEL); 7659 NVC365_CTRL_ACCESS_CNTR_BUFFER_ENABLE_PARAMS enableParams = { 0 }; 7660 7661 enableParams.intrOwnership = NVC365_CTRL_ACCESS_COUNTER_INTERRUPT_OWNERSHIP_RM; 7662 enableParams.enable = NV_FALSE; 7663 return pRmApi->Control(pRmApi, 7664 device->session->handle, 7665 pAccessCntrInfo->accessCntrBufferHandle, 7666 NVC365_CTRL_CMD_ACCESS_CNTR_BUFFER_ENABLE, 7667 &enableParams, 7668 sizeof(enableParams)); 7669 } 7670 7671 NV_STATUS nvGpuOpsDestroyAccessCntrInfo(struct gpuDevice *device, 7672 gpuAccessCntrInfo *pAccessCntrInfo) 7673 { 7674 gpuDeviceUnmapCpuFreeHandle(device, 7675 pAccessCntrInfo->accessCntrBufferHandle, 7676 pAccessCntrInfo->bufferAddress, 7677 0); 7678 portMemSet(pAccessCntrInfo, 0, sizeof(gpuAccessCntrInfo)); 7679 return NV_OK; 7680 } 7681 7682 NV_STATUS nvGpuOpsDestroyFaultInfo(struct gpuDevice *device, 7683 gpuFaultInfo *pFaultInfo) 7684 { 7685 NV_STATUS status = NV_OK; 7686 RM_API *pRmApi = rmapiGetInterface(RMAPI_EXTERNAL_KERNEL); 7687 7688 OBJGPU *pGpu; 7689 7690 status = rmapiLockAcquire(RMAPI_LOCK_FLAGS_READ, RM_LOCK_MODULES_GPU_OPS); 7691 NV_ASSERT(status == NV_OK); 7692 status = CliSetGpuContext(device->session->handle, device->handle, &pGpu, NULL); 7693 NV_ASSERT(status == NV_OK); 7694 rmapiLockRelease(); 7695 7696 if (pFaultInfo->faultBufferHandle && isDeviceVoltaPlus(device)) 7697 { 7698 NVC369_CTRL_MMU_FAULT_BUFFER_UNREGISTER_NON_REPLAY_BUF_PARAMS params = {0}; 7699 7700 params.pShadowBuffer = NV_PTR_TO_NvP64(pFaultInfo->nonReplayable.shadowBufferAddress); 7701 7702 status = pRmApi->Control(pRmApi, 7703 device->session->handle, 7704 pFaultInfo->faultBufferHandle, 7705 NVC369_CTRL_CMD_MMU_FAULT_BUFFER_UNREGISTER_NON_REPLAY_BUF, 7706 ¶ms, 7707 sizeof(params)); 7708 NV_ASSERT(status == NV_OK); 7709 } 7710 7711 if (pFaultInfo->faultBufferHandle && gpuIsCCFeatureEnabled(pGpu) && gpuIsGspOwnedFaultBuffersEnabled(pGpu)) 7712 { 7713 NVC369_CTRL_MMU_FAULT_BUFFER_UNREGISTER_REPLAY_BUF_PARAMS params = {0}; 7714 7715 params.pShadowBuffer = NV_PTR_TO_NvP64(pFaultInfo->replayable.bufferAddress); 7716 7717 status = pRmApi->Control(pRmApi, 7718 device->session->handle, 7719 pFaultInfo->faultBufferHandle, 7720 NVC369_CTRL_CMD_MMU_FAULT_BUFFER_UNREGISTER_REPLAY_BUF, 7721 ¶ms, 7722 sizeof(params)); 7723 NV_ASSERT(status == NV_OK); 7724 } 7725 else 7726 { 7727 gpuDeviceUnmapCpuFreeHandle(device, 7728 pFaultInfo->faultBufferHandle, 7729 pFaultInfo->replayable.bufferAddress, 7730 0); 7731 } 7732 7733 portMemSet(pFaultInfo, 0, sizeof(gpuFaultInfo)); 7734 return status; 7735 } 7736 7737 NV_STATUS nvGpuOpsHasPendingNonReplayableFaults(gpuFaultInfo *pFaultInfo, 7738 NvBool *hasPendingFaults) 7739 { 7740 NV_STATUS status = NV_OK; 7741 7742 if (hasPendingFaults == NULL) 7743 return NV_ERR_INVALID_ARGUMENT; 7744 7745 if (pFaultInfo->pDevice == NULL) 7746 return NV_ERR_INVALID_ARGUMENT; 7747 7748 OBJGPU *pGpu; 7749 7750 pGpu = GPU_RES_GET_GPU(pFaultInfo->pDevice); 7751 7752 // 7753 // In case of GSP owned HW fault buffers, CPU-RM maintains the get pointer. 7754 // The PUT pointer is updated by GSP in a PRI and CPU-RM just reads the same. 7755 // GET != PUT implies pending faults in the shadow buffer 7756 // 7757 if (gpuIsCCFeatureEnabled(pGpu) && gpuIsGspOwnedFaultBuffersEnabled(pGpu)) 7758 { 7759 KernelGmmu *pKernelGmmu = GPU_GET_KERNEL_GMMU(pGpu); 7760 7761 *hasPendingFaults = (pFaultInfo->nonReplayable.shadowBufferGet != 7762 kgmmuReadShadowBufPutIndex_HAL(pGpu, 7763 pKernelGmmu, 7764 NON_REPLAYABLE_FAULT_BUFFER)); 7765 7766 } 7767 else 7768 { 7769 GMMU_SHADOW_FAULT_BUF *pQueue = 7770 (GMMU_SHADOW_FAULT_BUF *) pFaultInfo->nonReplayable.shadowBufferAddress; 7771 7772 if (pQueue == NULL) 7773 return NV_ERR_INVALID_ARGUMENT; 7774 7775 *hasPendingFaults = !queueIsEmpty(pQueue); 7776 } 7777 7778 return status; 7779 } 7780 7781 NV_STATUS nvGpuOpsGetNonReplayableFaults(gpuFaultInfo *pFaultInfo, 7782 void *faultBuffer, 7783 NvU32 *numFaults) 7784 { 7785 NV_STATUS status = NV_OK; 7786 7787 if (faultBuffer == NULL || numFaults == NULL) 7788 return NV_ERR_INVALID_ARGUMENT; 7789 7790 if (pFaultInfo->pDevice == NULL) 7791 return NV_ERR_INVALID_ARGUMENT; 7792 7793 *numFaults = 0; 7794 7795 OBJGPU *pGpu; 7796 7797 pGpu = GPU_RES_GET_GPU(pFaultInfo->pDevice); 7798 7799 if (gpuIsCCFeatureEnabled(pGpu) && gpuIsGspOwnedFaultBuffersEnabled(pGpu)) 7800 { 7801 KernelGmmu *pKernelGmmu = GPU_GET_KERNEL_GMMU(pGpu); 7802 NvU32 shadowBufferPutIndex; 7803 NvU32 shadowBufferGetIndex; 7804 NvU32 maxFaultBufferEntries; 7805 struct ccslContext_t *cslCtx; 7806 7807 cslCtx = (struct ccslContext_t *) kgmmuGetShadowFaultBufferCslContext(pGpu, pKernelGmmu, NON_REPLAYABLE_FAULT_BUFFER); 7808 if (cslCtx == NULL) 7809 { 7810 NV_PRINTF(LEVEL_ERROR, "Non Replayable buffer CSL context not allocated\n"); 7811 return NV_ERR_INVALID_STATE; 7812 } 7813 7814 maxFaultBufferEntries = pFaultInfo->nonReplayable.bufferSize / NVC369_BUF_SIZE; 7815 shadowBufferGetIndex = pFaultInfo->nonReplayable.shadowBufferGet; 7816 shadowBufferPutIndex = kgmmuReadShadowBufPutIndex_HAL(pGpu, 7817 pKernelGmmu, 7818 NON_REPLAYABLE_FAULT_BUFFER); 7819 // Copy the fault packets as long as GET != PUT 7820 while (shadowBufferGetIndex != shadowBufferPutIndex) 7821 { 7822 UvmFaultMetadataPacket metadata; 7823 NvU8 *pShadowBuffer = (NvU8 *)pFaultInfo->nonReplayable.shadowBufferAddress; 7824 UvmFaultMetadataPacket *pShadowBufferMetadata = pFaultInfo->nonReplayable.shadowBufferMetadata; 7825 7826 ++(*numFaults); 7827 7828 portMemCopy(&metadata, sizeof(UvmFaultMetadataPacket), 7829 pShadowBufferMetadata + shadowBufferGetIndex, 7830 sizeof(UvmFaultMetadataPacket)); 7831 7832 // Sanity check valid bit is present, even though Non-Replayable handling relies on the PRI values. 7833 if (metadata.valid != GMMU_FAULT_PACKET_METADATA_VALID_YES) 7834 { 7835 return NV_ERR_INVALID_STATE; 7836 } 7837 7838 // 7839 // A read memory barrier here ensures that the valid bit check is performed before a decryption is attempted. 7840 // This is needed for architectures like PowerPC and ARM where read instructions can be reordered. 7841 // 7842 portAtomicMemoryFenceLoad(); 7843 7844 status = ccslDecrypt(cslCtx, 7845 sizeof(GMMU_FAULT_PACKET), 7846 pShadowBuffer + (shadowBufferGetIndex * NVC369_BUF_SIZE), 7847 NULL, 7848 &metadata.valid, 7849 sizeof(metadata.valid), 7850 faultBuffer, 7851 metadata.authTag); 7852 if (status != NV_OK) 7853 { 7854 NV_PRINTF(LEVEL_ERROR, "Fault buffer packet decryption failed with status = 0x%x\n", status); 7855 return status; 7856 } 7857 7858 // Clear the plaintext valid bit and authTag. 7859 portMemSet(pShadowBufferMetadata + shadowBufferGetIndex, 7860 0x0, 7861 sizeof(UvmFaultMetadataPacket)); 7862 7863 shadowBufferGetIndex = (shadowBufferGetIndex + 1) % maxFaultBufferEntries; 7864 faultBuffer = (NvU8 *)faultBuffer + NVC369_BUF_SIZE; 7865 } 7866 // Update the GET pointer 7867 pFaultInfo->nonReplayable.shadowBufferGet = shadowBufferGetIndex; 7868 } 7869 else 7870 { 7871 GMMU_SHADOW_FAULT_BUF *pQueue = 7872 (GMMU_SHADOW_FAULT_BUF *) pFaultInfo->nonReplayable.shadowBufferAddress; 7873 QueueContext *pQueueCtx = 7874 (QueueContext *) pFaultInfo->nonReplayable.shadowBufferContext; 7875 7876 if (pQueue == NULL) 7877 return NV_ERR_INVALID_ARGUMENT; 7878 7879 // Copy all faults in the client shadow fault buffer to the given buffer 7880 while (queuePopAndCopyNonManaged(pQueue, pQueueCtx, faultBuffer)) 7881 { 7882 ++(*numFaults); 7883 faultBuffer = (char *)faultBuffer + NVC369_BUF_SIZE; 7884 } 7885 } 7886 7887 return status; 7888 } 7889 7890 NV_STATUS nvGpuOpsFlushReplayableFaultBuffer(struct gpuDevice *device) 7891 { 7892 NV_STATUS status; 7893 NvHandle hClient = device->session->handle; 7894 RsClient *pClient; 7895 Device *pDevice; 7896 OBJGPU *pGpu; 7897 KernelGmmu *pKernelGmmu; 7898 7899 status = serverGetClientUnderLock(&g_resServ, hClient, &pClient); 7900 if (status != NV_OK) 7901 return NV_ERR_INVALID_ARGUMENT; 7902 7903 status = deviceGetByHandle(pClient, device->handle, &pDevice); 7904 if (status != NV_OK) 7905 return NV_ERR_INVALID_ARGUMENT; 7906 7907 GPU_RES_SET_THREAD_BC_STATE(pDevice); 7908 7909 pGpu = GPU_RES_GET_GPU(pDevice); 7910 pKernelGmmu = GPU_GET_KERNEL_GMMU(pGpu); 7911 7912 return kgmmuIssueReplayableFaultBufferFlush_HAL(pGpu, pKernelGmmu); 7913 } 7914 7915 static NV_STATUS nvGpuOpsVerifyChannel(struct gpuAddressSpace *vaSpace, 7916 RsClient *pClient, 7917 NvHandle hKernelChannel, 7918 OBJGPU **pGpu, 7919 KernelChannel **ppKernelChannel) 7920 { 7921 NV_STATUS status = NV_OK; 7922 NvHandle hDevice; 7923 OBJVASPACE *pVAS = NULL; 7924 OBJGPU *pVaSpaceGpu; 7925 RsClient *pSessionClient; 7926 Subdevice *pSubdevice; 7927 7928 NV_ASSERT_OR_RETURN(ppKernelChannel != NULL, NV_ERR_INVALID_ARGUMENT); 7929 7930 status = serverGetClientUnderLock(&g_resServ, vaSpace->device->session->handle, 7931 &pSessionClient); 7932 if (status != NV_OK) 7933 return status; 7934 7935 status = vaspaceGetByHandleOrDeviceDefault(pSessionClient, 7936 vaSpace->device->handle, 7937 vaSpace->handle, 7938 &pVAS); 7939 if (status != NV_OK) 7940 return status; 7941 7942 status = CliGetKernelChannel(pClient, hKernelChannel, ppKernelChannel); 7943 if (status != NV_OK) 7944 return NV_ERR_INVALID_OBJECT_HANDLE; 7945 7946 hDevice = RES_GET_HANDLE(GPU_RES_GET_DEVICE(*ppKernelChannel)); 7947 status = CliSetGpuContext(pClient->hClient, hDevice, pGpu, NULL); 7948 if (status != NV_OK) 7949 return status; 7950 7951 if ((*ppKernelChannel)->pVAS != pVAS) 7952 { 7953 if (CliSetGpuContext(vaSpace->device->session->handle, 7954 vaSpace->device->handle, 7955 &pVaSpaceGpu, 7956 NULL) == NV_OK && pVaSpaceGpu != *pGpu) 7957 return NV_ERR_OTHER_DEVICE_FOUND; 7958 7959 return NV_ERR_INVALID_CHANNEL; 7960 } 7961 7962 // In SLI config, RM's internal allocations such as channel instance 7963 // are tracked with a memdesc per subdevice. Hence, Get the correct pGpu. 7964 status = subdeviceGetByHandle(pSessionClient, vaSpace->device->subhandle, 7965 &pSubdevice); 7966 if (status != NV_OK) 7967 return status; 7968 7969 *pGpu = GPU_RES_GET_GPU(pSubdevice); 7970 7971 GPU_RES_SET_THREAD_BC_STATE(pSubdevice); 7972 7973 return NV_OK; 7974 } 7975 7976 static NV_STATUS nvGpuOpsGetChannelEngineType(OBJGPU *pGpu, 7977 KernelChannel *pKernelChannel, 7978 UVM_GPU_CHANNEL_ENGINE_TYPE *engineType) 7979 { 7980 KernelFifo *pKernelFifo = GPU_GET_KERNEL_FIFO(pGpu); 7981 NvU32 engDesc; 7982 RM_ENGINE_TYPE rmEngineType; 7983 NV_STATUS status; 7984 7985 NV_ASSERT_OR_RETURN(pKernelChannel != NULL, NV_ERR_INVALID_ARGUMENT); 7986 7987 status = kchannelGetEngine_HAL(pGpu, pKernelChannel, &engDesc); 7988 if (status != NV_OK) 7989 return status; 7990 7991 status = kfifoEngineInfoXlate_HAL(pGpu, 7992 pKernelFifo, 7993 ENGINE_INFO_TYPE_ENG_DESC, 7994 engDesc, 7995 ENGINE_INFO_TYPE_RM_ENGINE_TYPE, 7996 (NvU32 *)&rmEngineType); 7997 if (status != NV_OK) 7998 return status; 7999 8000 if (RM_ENGINE_TYPE_IS_GR(rmEngineType)) 8001 *engineType = UVM_GPU_CHANNEL_ENGINE_TYPE_GR; 8002 else if (rmEngineType == RM_ENGINE_TYPE_SEC2) 8003 *engineType = UVM_GPU_CHANNEL_ENGINE_TYPE_SEC2; 8004 else 8005 *engineType = UVM_GPU_CHANNEL_ENGINE_TYPE_CE; 8006 8007 return NV_OK; 8008 } 8009 8010 static void _memdescRetain(MEMORY_DESCRIPTOR *pMemDesc) 8011 { 8012 if (pMemDesc->Allocated > 0) 8013 { 8014 pMemDesc->Allocated++; 8015 } 8016 8017 memdescAddRef(pMemDesc); 8018 } 8019 8020 static NV_STATUS nvGpuOpsGetChannelInstanceMemInfo(gpuRetainedChannel *retainedChannel, 8021 gpuChannelInstanceInfo *channelInstanceInfo) 8022 { 8023 PMEMORY_DESCRIPTOR pMemDesc = NULL; 8024 NV2080_CTRL_FIFO_MEM_INFO instanceMemInfo; 8025 NV_STATUS status; 8026 KernelFifo *pKernelFifo = GPU_GET_KERNEL_FIFO(retainedChannel->pGpu); 8027 KernelChannel *pKernelChannel = NULL; 8028 CHID_MGR *pChidMgr = kfifoGetChidMgr(retainedChannel->pGpu, 8029 pKernelFifo, 8030 retainedChannel->runlistId); 8031 8032 pKernelChannel = kfifoChidMgrGetKernelChannel(retainedChannel->pGpu, 8033 pKernelFifo, 8034 pChidMgr, 8035 channelInstanceInfo->chId); 8036 NV_CHECK_OR_RETURN(LEVEL_ERROR, pKernelChannel != NULL, NV_ERR_INVALID_CHANNEL); 8037 8038 status = kfifoChannelGetFifoContextMemDesc_HAL(retainedChannel->pGpu, 8039 pKernelFifo, 8040 pKernelChannel, 8041 FIFO_CTX_INST_BLOCK, 8042 &pMemDesc); 8043 if (status != NV_OK) 8044 return status; 8045 8046 pMemDesc = memdescGetMemDescFromGpu(pMemDesc, retainedChannel->pGpu); 8047 8048 kfifoFillMemInfo(pKernelFifo, pMemDesc, &instanceMemInfo); 8049 8050 if (instanceMemInfo.aperture == NV2080_CTRL_CMD_FIFO_GET_CHANNEL_MEM_APERTURE_INVALID) 8051 return NV_ERR_INVALID_OBJECT_HANDLE; 8052 8053 retainedChannel->instanceMemDesc = pMemDesc; 8054 channelInstanceInfo->base = instanceMemInfo.base; 8055 channelInstanceInfo->sysmem = (instanceMemInfo.aperture != NV2080_CTRL_CMD_FIFO_GET_CHANNEL_MEM_APERTURE_VIDMEM); 8056 8057 return NV_OK; 8058 } 8059 8060 static NV_STATUS nvGpuOpsGetChannelTsgInfo(gpuRetainedChannel *retainedChannel, 8061 gpuChannelInstanceInfo *channelInstanceInfo, 8062 KernelChannel *pKernelChannel) 8063 { 8064 OBJGPU *pGpu = retainedChannel->pGpu; 8065 KernelFifo *pKernelFifo = GPU_GET_KERNEL_FIFO(pGpu); 8066 RM_API *pRmApi = rmapiGetInterface(RMAPI_GPU_LOCK_INTERNAL); 8067 KernelChannelGroup *pKernelChannelGroup; 8068 NvHandle hDupTsg; 8069 NvU32 tsgMaxSubctxCount; 8070 NV_STATUS status; 8071 NvBool bLockAcquire = NV_FALSE; 8072 8073 NV_ASSERT_OR_RETURN(pKernelChannel != NULL, NV_ERR_INVALID_ARGUMENT); 8074 pKernelChannelGroup = pKernelChannel->pKernelChannelGroupApi->pKernelChannelGroup; 8075 NV_ASSERT_OR_RETURN(pKernelChannelGroup != NULL, NV_ERR_INVALID_STATE); 8076 8077 if (rmGpuLockIsOwner()) 8078 { 8079 rmGpuLocksRelease(GPUS_LOCK_FLAGS_NONE, NULL); 8080 bLockAcquire = NV_TRUE; 8081 } 8082 // Take a reference on the TSG ID by duping the TSG. Note that this is 8083 // the easy way out because we dup more than strictly necessary: every 8084 // channel registered under the same TSG will re-dup that TSG. In 8085 // practice there's very little overhead to re-duping the TSG for each 8086 // channel. 8087 hDupTsg = NV01_NULL_OBJECT; 8088 status = pRmApi->DupObject(pRmApi, 8089 retainedChannel->session->handle, 8090 retainedChannel->rmDevice->deviceHandle, 8091 &hDupTsg, 8092 RES_GET_CLIENT_HANDLE(pKernelChannel), 8093 RES_GET_HANDLE(pKernelChannel->pKernelChannelGroupApi), 8094 NV04_DUP_HANDLE_FLAGS_REJECT_KERNEL_DUP_PRIVILEGE); 8095 if (status != NV_OK) 8096 { 8097 if (bLockAcquire) 8098 { 8099 NV_ASSERT_OK_OR_RETURN(rmGpuLocksAcquire(GPUS_LOCK_FLAGS_NONE, RM_LOCK_MODULES_GPU_OPS)); 8100 } 8101 return status; 8102 } 8103 8104 if (bLockAcquire) 8105 { 8106 if ((status = rmGpuLocksAcquire(GPUS_LOCK_FLAGS_NONE, RM_LOCK_MODULES_GPU_OPS)) != NV_OK) 8107 { 8108 pRmApi->Free(pRmApi, retainedChannel->session->handle, hDupTsg); 8109 return status; 8110 } 8111 } 8112 8113 tsgMaxSubctxCount = kfifoChannelGroupGetLocalMaxSubcontext_HAL( 8114 pGpu, pKernelFifo, 8115 pKernelChannelGroup, 8116 pKernelChannelGroup->bLegacyMode); 8117 8118 channelInstanceInfo->bTsgChannel = NV_TRUE; 8119 channelInstanceInfo->tsgId = pKernelChannelGroup->grpID; 8120 channelInstanceInfo->tsgMaxSubctxCount = tsgMaxSubctxCount; 8121 8122 retainedChannel->hDupTsg = hDupTsg; 8123 8124 return NV_OK; 8125 } 8126 8127 8128 static NV_STATUS nvGpuOpsGetChannelSmcInfo(gpuRetainedChannel *retainedChannel, 8129 gpuChannelInstanceInfo *channelInstanceInfo, 8130 KernelChannel *pKernelChannel, 8131 struct gpuDevice *device) 8132 { 8133 channelInstanceInfo->smcEngineId = 0; 8134 channelInstanceInfo->smcEngineVeIdOffset = 0; 8135 8136 NV_ASSERT_OR_RETURN(pKernelChannel != NULL, NV_ERR_INVALID_ARGUMENT); 8137 8138 if (isDeviceAmperePlus(device) && retainedChannel->channelEngineType == UVM_GPU_CHANNEL_ENGINE_TYPE_GR) 8139 { 8140 OBJGPU *pGpu = retainedChannel->pGpu; 8141 8142 if (IS_MIG_IN_USE(pGpu)) 8143 { 8144 NvU32 grFaultId; 8145 NvU32 grMmuFaultEngId; 8146 8147 const NvU32 grIdx = RM_ENGINE_TYPE_GR_IDX(kchannelGetEngineType(pKernelChannel)); 8148 8149 NV_ASSERT_OK_OR_RETURN(kfifoEngineInfoXlate_HAL(pGpu, 8150 GPU_GET_KERNEL_FIFO(pGpu), 8151 ENGINE_INFO_TYPE_ENG_DESC, 8152 ENG_GR(grIdx), 8153 ENGINE_INFO_TYPE_MMU_FAULT_ID, 8154 &grFaultId)); 8155 8156 grMmuFaultEngId = kgmmuGetGraphicsEngineId_HAL(GPU_GET_KERNEL_GMMU(pGpu)); 8157 NV_ASSERT(grFaultId >= grMmuFaultEngId); 8158 8159 channelInstanceInfo->smcEngineId = grIdx; 8160 channelInstanceInfo->smcEngineVeIdOffset = grFaultId - grMmuFaultEngId; 8161 } 8162 } 8163 8164 return NV_OK; 8165 } 8166 8167 8168 static void nvGpuOpsGetChannelSubctxInfo(gpuRetainedChannel *retainedChannel, 8169 gpuChannelInstanceInfo *channelInstanceInfo, 8170 KernelChannel *pKernelChannel) 8171 { 8172 OBJGPU *pGpu = retainedChannel->pGpu; 8173 KernelFifo *pKernelFifo = GPU_GET_KERNEL_FIFO(pGpu); 8174 NvHandle hDupKernelCtxShare = NV01_NULL_OBJECT; 8175 RM_API *pRmApi; 8176 NV_STATUS status = NV_OK; 8177 8178 NV_ASSERT_OR_RETURN_VOID(pKernelChannel != NULL); 8179 8180 pRmApi = rmapiGetInterface(RMAPI_GPU_LOCK_INTERNAL); 8181 8182 // Subcontexts are parented by the TSG, so we must have a reference on the 8183 // TSG in order to retain the subcontext. The exception is if this channel 8184 // was allocated without a TSG, in which case RM creates an internal TSG and 8185 // subcontext which we shouldn't attempt to retain. In that case, we will 8186 // have skipped duping the TSG earlier and hDupTsg == 0. 8187 // 8188 // pKernelChannelGroup->bLegacyMode means that the subcontext was 8189 // created internally by RM, not by the user. 8190 if (kfifoIsSubcontextSupported(pKernelFifo) && 8191 pKernelChannel->pKernelCtxShareApi && 8192 retainedChannel->channelEngineType == UVM_GPU_CHANNEL_ENGINE_TYPE_GR && 8193 retainedChannel->hDupTsg && 8194 !pKernelChannel->pKernelChannelGroupApi->pKernelChannelGroup->bLegacyMode) 8195 { 8196 8197 status = pRmApi->DupObject(pRmApi, 8198 retainedChannel->session->handle, 8199 retainedChannel->hDupTsg, 8200 &hDupKernelCtxShare, 8201 RES_GET_CLIENT_HANDLE(pKernelChannel), 8202 RES_GET_HANDLE(pKernelChannel->pKernelCtxShareApi), 8203 NV04_DUP_HANDLE_FLAGS_REJECT_KERNEL_DUP_PRIVILEGE); 8204 8205 NV_ASSERT(status == NV_OK); 8206 retainedChannel->hDupKernelCtxShare = hDupKernelCtxShare; 8207 8208 // Faults report the VEID (aka subcontext ID), so we need to retain the 8209 // subcontext ID. We do that by taking a reference on the entire 8210 // subcontext object. 8211 // 8212 // pKernelCtxShare->pShareData is a pointer to the broadcast kctxshare data object 8213 // We get VEID for this retained channel's GPU through that. 8214 8215 // Possibly better to go through the handle qw just duped for this? Nor sure how to do so. 8216 channelInstanceInfo->subctxId = pKernelChannel->pKernelCtxShareApi->pShareData->subctxId; 8217 channelInstanceInfo->bInSubctx = NV_TRUE; 8218 8219 // Make sure that we saw our GPU 8220 NV_ASSERT(channelInstanceInfo->bInSubctx); 8221 NV_ASSERT(channelInstanceInfo->subctxId < channelInstanceInfo->tsgMaxSubctxCount); 8222 } 8223 else 8224 { 8225 channelInstanceInfo->subctxId = 0; 8226 channelInstanceInfo->bInSubctx = NV_FALSE; 8227 } 8228 } 8229 8230 // This function verifies that the instance pointer of the retainedChannel still 8231 // refers to a valid channel. 8232 static NV_STATUS nvGpuOpsGetChannelData(gpuRetainedChannel *retainedChannel, 8233 KernelChannel **ppKernelChannel) 8234 { 8235 NV2080_CTRL_FIFO_MEM_INFO instanceMemInfo; 8236 INST_BLOCK_DESC inst; 8237 KernelFifo *pKernelFifo = GPU_GET_KERNEL_FIFO(retainedChannel->pGpu); 8238 8239 kfifoFillMemInfo(pKernelFifo, retainedChannel->instanceMemDesc, &instanceMemInfo); 8240 8241 switch (instanceMemInfo.aperture) 8242 { 8243 case NV2080_CTRL_CMD_FIFO_GET_CHANNEL_MEM_APERTURE_SYSMEM_COH: 8244 inst.aperture = NV_MMU_PTE_APERTURE_SYSTEM_COHERENT_MEMORY; 8245 break; 8246 case NV2080_CTRL_CMD_FIFO_GET_CHANNEL_MEM_APERTURE_SYSMEM_NCOH: 8247 inst.aperture = NV_MMU_PTE_APERTURE_SYSTEM_NON_COHERENT_MEMORY; 8248 break; 8249 case NV2080_CTRL_CMD_FIFO_GET_CHANNEL_MEM_APERTURE_VIDMEM: 8250 inst.aperture = NV_MMU_PTE_APERTURE_VIDEO_MEMORY; 8251 break; 8252 default: 8253 return NV_ERR_INVALID_CHANNEL; 8254 } 8255 8256 inst.address = instanceMemInfo.base; 8257 inst.gfid = GPU_GFID_PF; // Run in VF context w/o GFID 8258 8259 return kfifoConvertInstToKernelChannel_HAL(retainedChannel->pGpu, 8260 pKernelFifo, 8261 &inst, 8262 ppKernelChannel); 8263 } 8264 8265 NV_STATUS nvGpuOpsRetainChannel(struct gpuAddressSpace *vaSpace, 8266 NvHandle hClient, 8267 NvHandle hKernelChannel, 8268 gpuRetainedChannel **retainedChannel, 8269 gpuChannelInstanceInfo *channelInstanceInfo) 8270 { 8271 nvGpuOpsLockSet acquiredLocks; 8272 THREAD_STATE_NODE threadState; 8273 RsClient *pClient; 8274 KernelChannel *pKernelChannel = NULL; 8275 OBJGPU *pGpu = NULL; 8276 gpuRetainedChannel *channel = NULL; 8277 NV_STATUS status = NV_OK; 8278 struct gpuDevice *device; 8279 subDeviceDesc *rmSubDevice; 8280 NVC36F_CTRL_CMD_GPFIFO_GET_WORK_SUBMIT_TOKEN_PARAMS params = {0}; 8281 NV_UVM_CHANNEL_RETAINER_ALLOC_PARAMS channelRetainerParams = {0}; 8282 RM_API *pRmApi = NULL; 8283 NvHandle hChannelParent = 0; 8284 8285 if (!vaSpace || !channelInstanceInfo) 8286 return NV_ERR_INVALID_ARGUMENT; 8287 8288 threadStateInit(&threadState, THREAD_STATE_FLAGS_NONE); 8289 status = _nvGpuOpsLocksAcquireAll(RMAPI_LOCK_FLAGS_READ, 8290 hClient, 8291 &pClient, 8292 &acquiredLocks); 8293 if (status != NV_OK) 8294 { 8295 threadStateFree(&threadState, THREAD_STATE_FLAGS_NONE); 8296 return status; 8297 } 8298 8299 device = vaSpace->device; 8300 rmSubDevice = device->rmSubDevice; 8301 8302 status = nvGpuOpsVerifyChannel(vaSpace, pClient, hKernelChannel, &pGpu, 8303 &pKernelChannel); 8304 if (status != NV_OK) 8305 { 8306 _nvGpuOpsLocksRelease(&acquiredLocks); 8307 threadStateFree(&threadState, THREAD_STATE_FLAGS_NONE); 8308 return status; 8309 } 8310 8311 portMemSet(channelInstanceInfo, 0, sizeof(*channelInstanceInfo)); 8312 8313 channel = portMemAllocNonPaged(sizeof(*channel)); 8314 if (channel == NULL) 8315 { 8316 status = NV_ERR_NO_MEMORY; 8317 _nvGpuOpsLocksRelease(&acquiredLocks); 8318 threadStateFree(&threadState, THREAD_STATE_FLAGS_NONE); 8319 return status; 8320 } 8321 8322 portMemSet(channel, 0, sizeof(*channel)); 8323 channel->device = device; 8324 channel->rmDevice = device->rmDevice; 8325 channel->rmSubDevice = rmSubDevice; 8326 channel->session = device->session; 8327 channel->pGpu = pGpu; 8328 8329 channelInstanceInfo->runlistId = kchannelGetRunlistId(pKernelChannel); 8330 channelInstanceInfo->chId = pKernelChannel->ChID; 8331 channel->chId = pKernelChannel->ChID; 8332 channel->runlistId = kchannelGetRunlistId(pKernelChannel); 8333 8334 status = nvGpuOpsGetChannelEngineType(pGpu, pKernelChannel, &channel->channelEngineType); 8335 if (status != NV_OK) 8336 goto error; 8337 8338 status = nvGpuOpsGetChannelInstanceMemInfo(channel, channelInstanceInfo); 8339 if (status != NV_OK) 8340 goto error; 8341 8342 status = nvGpuOpsGetChannelTsgInfo(channel, channelInstanceInfo, 8343 pKernelChannel); 8344 if (status != NV_OK) 8345 goto error; 8346 8347 status = nvGpuOpsGetChannelSmcInfo(channel, channelInstanceInfo, 8348 pKernelChannel, device); 8349 if (status != NV_OK) 8350 goto error; 8351 8352 nvGpuOpsGetChannelSubctxInfo(channel, channelInstanceInfo, pKernelChannel); 8353 8354 pRmApi = rmapiGetInterface(RMAPI_GPU_LOCK_INTERNAL); 8355 8356 if (channelInstanceInfo->bTsgChannel) 8357 hChannelParent = channel->hDupTsg; 8358 else 8359 hChannelParent = channel->rmDevice->deviceHandle; 8360 8361 channelRetainerParams.hClient = hClient; 8362 channelRetainerParams.hChannel = hKernelChannel; 8363 8364 NV_PRINTF(LEVEL_INFO, "%s:Channel duping is not supported. Fall back to UVM_CHANNEL_RETAINER\n", 8365 __FUNCTION__); 8366 8367 status = pRmApi->Alloc(pRmApi, 8368 device->session->handle, 8369 hChannelParent, 8370 &channel->hChannelRetainer, 8371 UVM_CHANNEL_RETAINER, 8372 &channelRetainerParams, 8373 sizeof(channelRetainerParams)); 8374 if (status != NV_OK) 8375 goto error; 8376 8377 // Now get the token for submission on given channel. 8378 status = pRmApi->Control(pRmApi, 8379 hClient, 8380 hKernelChannel, 8381 NVC36F_CTRL_CMD_GPFIFO_GET_WORK_SUBMIT_TOKEN, 8382 ¶ms, 8383 sizeof(params)); 8384 8385 if (status != NV_OK) 8386 { 8387 NV_PRINTF(LEVEL_ERROR, "%s:%d: %s\n", __FUNCTION__, 8388 __LINE__, nvstatusToString(status)); 8389 goto error; 8390 } 8391 8392 // In Turing+ gpus, the CLEAR_FAULTED method requires a RM-provided handle 8393 // to identify the channel. 8394 // 8395 // TODO: Bug 1905719: We are currently using the channel handle that is 8396 // used for the work submission usermode doorbell mechanism. However, the 8397 // values may differ in the future, so we may need a dedicated API to get 8398 // the channel handle for CLEAR_FAULTED in RM. 8399 channelInstanceInfo->clearFaultedToken = params.workSubmitToken; 8400 8401 if (isDeviceAmperePlus(device)) 8402 { 8403 void *bar0Mapping = gpuBar0BaseAddress(pGpu); 8404 NvU32 chramPri; 8405 NvU32 runlistPri; 8406 8407 NV_ASSERT_OK_OR_GOTO(status, kfifoEngineInfoXlate_HAL(pGpu, 8408 GPU_GET_KERNEL_FIFO(pGpu), 8409 ENGINE_INFO_TYPE_RUNLIST, 8410 kchannelGetRunlistId(pKernelChannel), 8411 ENGINE_INFO_TYPE_CHRAM_PRI_BASE, 8412 &chramPri), error); 8413 8414 chramPri += NV_CHRAM_CHANNEL(pKernelChannel->ChID); 8415 8416 channelInstanceInfo->pChramChannelRegister = (NvU32 *)((NvU8*)bar0Mapping + chramPri); 8417 8418 NV_ASSERT_OK_OR_GOTO(status, kfifoEngineInfoXlate_HAL(pGpu, 8419 GPU_GET_KERNEL_FIFO(pGpu), 8420 ENGINE_INFO_TYPE_RUNLIST, 8421 kchannelGetRunlistId(pKernelChannel), 8422 ENGINE_INFO_TYPE_RUNLIST_PRI_BASE, 8423 &runlistPri), error); 8424 8425 channelInstanceInfo->pRunlistPRIBaseRegister = (NvU32 *)((NvU8*)bar0Mapping + runlistPri); 8426 } 8427 8428 status = _nvGpuOpsRetainChannelResources(device, 8429 hClient, 8430 hKernelChannel, 8431 channel, 8432 channelInstanceInfo); 8433 if (status != NV_OK) 8434 { 8435 NV_PRINTF(LEVEL_ERROR, "%s:%d: %s\n", __FUNCTION__, 8436 __LINE__, nvstatusToString(status)); 8437 goto error; 8438 } 8439 8440 channelInstanceInfo->channelEngineType = channel->channelEngineType; 8441 *retainedChannel = channel; 8442 8443 _nvGpuOpsLocksRelease(&acquiredLocks); 8444 threadStateFree(&threadState, THREAD_STATE_FLAGS_NONE); 8445 return NV_OK; 8446 8447 error: 8448 _nvGpuOpsReleaseChannel(channel); 8449 _nvGpuOpsLocksRelease(&acquiredLocks); 8450 threadStateFree(&threadState, THREAD_STATE_FLAGS_NONE); 8451 return status; 8452 } 8453 8454 static void _nvGpuOpsReleaseChannel(gpuRetainedChannel *retainedChannel) 8455 { 8456 NV_STATUS status = NV_OK; 8457 struct gpuSession *session; 8458 RM_API *pRmApi = rmapiGetInterface(RMAPI_GPU_LOCK_INTERNAL); 8459 8460 if (!retainedChannel) 8461 return; 8462 8463 _nvGpuOpsReleaseChannelResources(retainedChannel); 8464 8465 session = retainedChannel->session; 8466 NV_ASSERT(session); 8467 8468 if (retainedChannel->hChannelRetainer) 8469 { 8470 status = pRmApi->Free(pRmApi, session->handle, retainedChannel->hChannelRetainer); 8471 NV_ASSERT(status == NV_OK); 8472 } 8473 8474 // Release the subcontext if we retained it. Subcontexts are parented by the 8475 // TSG, so we must release the subcontext before releasing the TSG. 8476 if (retainedChannel->hDupKernelCtxShare) 8477 { 8478 NV_ASSERT(retainedChannel->hDupTsg); 8479 status = pRmApi->Free(pRmApi, session->handle, retainedChannel->hDupKernelCtxShare); 8480 NV_ASSERT(status == NV_OK); 8481 } 8482 8483 if (retainedChannel->hDupTsg) 8484 { 8485 status = pRmApi->Free(pRmApi, session->handle, retainedChannel->hDupTsg); 8486 NV_ASSERT(status == NV_OK); 8487 } 8488 8489 8490 // Releasing the channel ID can only fail if the ID is no longer valid, 8491 // which indicates a bug elsewhere. 8492 NV_ASSERT(status == NV_OK); 8493 8494 portMemFree(retainedChannel); 8495 } 8496 8497 void nvGpuOpsReleaseChannel(gpuRetainedChannel *retainedChannel) 8498 { 8499 nvGpuOpsLockSet acquiredLocks; 8500 THREAD_STATE_NODE threadState; 8501 threadStateInit(&threadState, THREAD_STATE_FLAGS_NONE); 8502 // TODO can we lock fewer GPUS with Channel information? 8503 if (_nvGpuOpsLocksAcquireAll(RMAPI_LOCK_FLAGS_READ, 8504 retainedChannel->session->handle, 8505 NULL, 8506 &acquiredLocks) != NV_OK) 8507 { 8508 threadStateFree(&threadState, THREAD_STATE_FLAGS_NONE); 8509 return; 8510 } 8511 _nvGpuOpsReleaseChannel(retainedChannel); 8512 _nvGpuOpsLocksRelease(&acquiredLocks); 8513 threadStateFree(&threadState, THREAD_STATE_FLAGS_NONE); 8514 } 8515 8516 static void 8517 _shadowMemdescDestroy(gpuRetainedChannel *retainedChannel, 8518 MEMORY_DESCRIPTOR *pMemDesc) 8519 { 8520 if (pMemDesc->RefCount == 1) 8521 { 8522 mapRemoveByKey(&retainedChannel->device->kern2PhysDescrMap, (NvU64) pMemDesc); 8523 } 8524 8525 memdescDestroy(pMemDesc); 8526 } 8527 8528 NvBool _memDescFindAndRetain(gpuRetainedChannel *retainedChannel, 8529 MEMORY_DESCRIPTOR *pBufferHandle, 8530 MEMORY_DESCRIPTOR **ppMemDesc) 8531 { 8532 MEMORY_DESCRIPTOR *pMemDesc = NULL; 8533 MemdescMapIter iter = mapIterAll(&retainedChannel->device->kern2PhysDescrMap); 8534 while (mapIterNext(&iter)) 8535 { 8536 MEMORY_DESCRIPTOR **ppValue = iter.pValue; 8537 if (pBufferHandle == *ppValue) 8538 { 8539 NvU64 key = mapKey(&retainedChannel->device->kern2PhysDescrMap, ppValue); 8540 pMemDesc = (MEMORY_DESCRIPTOR *) key; 8541 break; 8542 } 8543 } 8544 8545 if (pMemDesc != NULL) 8546 { 8547 _memdescRetain(pMemDesc); 8548 *ppMemDesc = pMemDesc; 8549 return NV_TRUE; 8550 } 8551 return NV_FALSE; 8552 } 8553 8554 static NV_STATUS 8555 _shadowMemdescCreateFlcn(gpuRetainedChannel *retainedChannel, 8556 NV2080_CTRL_FLCN_GET_CTX_BUFFER_INFO_PARAMS *pCtxBufferInfo, 8557 MEMORY_DESCRIPTOR **ppMemDesc) 8558 { 8559 MEMORY_DESCRIPTOR *pMemDesc = NULL; 8560 MEMORY_DESCRIPTOR *pBufferHandle = (MEMORY_DESCRIPTOR *) pCtxBufferInfo->bufferHandle; 8561 NV_STATUS status = NV_OK; 8562 8563 NV_ASSERT_OR_RETURN(pCtxBufferInfo->bIsContigous, NV_ERR_INVALID_STATE); 8564 8565 if (_memDescFindAndRetain(retainedChannel, pBufferHandle, ppMemDesc)) 8566 return status; 8567 8568 status = memdescCreate(&pMemDesc, 8569 retainedChannel->pGpu, 8570 pCtxBufferInfo->size, 8571 pCtxBufferInfo->alignment, 8572 pCtxBufferInfo->bIsContigous, 8573 pCtxBufferInfo->aperture, 8574 NV_MEMORY_CACHED, 8575 MEMDESC_FLAGS_NONE 8576 ); 8577 NV_CHECK_OK_OR_RETURN(LEVEL_ERROR, status); 8578 8579 memdescSetPageSize(pMemDesc, 0, pCtxBufferInfo->pageSize); 8580 8581 memdescDescribe(pMemDesc, pCtxBufferInfo->aperture, pCtxBufferInfo->physAddr, pCtxBufferInfo->size); 8582 8583 (void) mapInsertValue(&retainedChannel->device->kern2PhysDescrMap, 8584 (NvU64) pMemDesc, 8585 &pBufferHandle); 8586 *ppMemDesc = pMemDesc; 8587 8588 return status; 8589 } 8590 8591 8592 static NV_STATUS 8593 _shadowMemdescCreate(gpuRetainedChannel *retainedChannel, 8594 NV2080_CTRL_GR_CTX_BUFFER_INFO *pCtxBufferInfo, 8595 MEMORY_DESCRIPTOR **ppMemDesc) 8596 { 8597 NvU32 j; 8598 NvU64 pageSize = pCtxBufferInfo->pageSize; 8599 NvU32 numBufferPages = NV_ROUNDUP(pCtxBufferInfo->size, pageSize) / pageSize; 8600 MEMORY_DESCRIPTOR *pMemDesc = NULL; 8601 MEMORY_DESCRIPTOR *pBufferHandle = (MEMORY_DESCRIPTOR *) pCtxBufferInfo->bufferHandle; 8602 NV2080_CTRL_KGR_GET_CTX_BUFFER_PTES_PARAMS *pParams = NULL; 8603 NvU64 *pPages = NULL; 8604 NV_STATUS status = NV_OK; 8605 KernelChannel *pKernelChannel; 8606 RM_API *pRmApi; 8607 8608 if (_memDescFindAndRetain(retainedChannel, pBufferHandle, ppMemDesc)) 8609 goto done; 8610 8611 pPages = portMemAllocNonPaged(sizeof(*pPages) * numBufferPages); 8612 if (pPages == NULL) 8613 { 8614 status = NV_ERR_NO_MEMORY; 8615 goto done; 8616 } 8617 8618 status = nvGpuOpsGetChannelData(retainedChannel, &pKernelChannel); 8619 if (status != NV_OK) 8620 { 8621 goto done; 8622 } 8623 8624 pParams = portMemAllocNonPaged(sizeof(*pParams)); 8625 if (pParams == NULL) 8626 { 8627 status = NV_ERR_NO_MEMORY; 8628 goto done; 8629 } 8630 8631 portMemSet(pParams, 0, sizeof(*pParams)); 8632 8633 pParams->hUserClient = RES_GET_CLIENT_HANDLE(pKernelChannel); 8634 pParams->hChannel = RES_GET_HANDLE(pKernelChannel); 8635 pParams->bufferType = pCtxBufferInfo->bufferType; 8636 8637 pRmApi = rmapiGetInterface(RMAPI_GPU_LOCK_INTERNAL); 8638 8639 for (j = 0; j < numBufferPages;) 8640 { 8641 pParams->firstPage = j; 8642 status = pRmApi->Control(pRmApi, 8643 retainedChannel->session->handle, 8644 retainedChannel->rmSubDevice->subDeviceHandle, 8645 NV2080_CTRL_CMD_KGR_GET_CTX_BUFFER_PTES, 8646 pParams, 8647 sizeof(*pParams)); 8648 if (status != NV_OK) 8649 { 8650 goto done; 8651 } 8652 8653 NV_ASSERT(j + pParams->numPages <= numBufferPages); 8654 8655 if (pCtxBufferInfo->bIsContigous) 8656 { 8657 pPages[0] = (NvU64)pParams->physAddrs[0]; 8658 break; 8659 } 8660 8661 portMemCopy(&pPages[j], pParams->numPages * sizeof(*pPages), 8662 pParams->physAddrs, pParams->numPages * sizeof(*pPages)); 8663 j += pParams->numPages; 8664 } 8665 8666 NV_ASSERT(pParams->bNoMorePages); 8667 8668 status = memdescCreate(&pMemDesc, 8669 retainedChannel->pGpu, 8670 pCtxBufferInfo->size, 8671 pCtxBufferInfo->alignment, 8672 pCtxBufferInfo->bIsContigous, 8673 pCtxBufferInfo->aperture, 8674 NV_MEMORY_CACHED, 8675 MEMDESC_FLAGS_NONE 8676 ); 8677 if (status != NV_OK) 8678 { 8679 goto done; 8680 } 8681 8682 8683 memdescSetPageSize(pMemDesc, 0, pCtxBufferInfo->pageSize); 8684 8685 if (pCtxBufferInfo->bIsContigous) 8686 { 8687 memdescDescribe(pMemDesc, pCtxBufferInfo->aperture, pPages[0], pCtxBufferInfo->size); 8688 } 8689 else 8690 { 8691 memdescFillPages(pMemDesc, 0, pPages, numBufferPages, pCtxBufferInfo->pageSize); 8692 } 8693 8694 (void) mapInsertValue(&retainedChannel->device->kern2PhysDescrMap, 8695 (NvU64) pMemDesc, 8696 &pBufferHandle); 8697 *ppMemDesc = pMemDesc; 8698 8699 done: 8700 portMemFree(pParams); 8701 portMemFree(pPages); 8702 return status; 8703 } 8704 8705 static NV_STATUS _nvGpuOpsRetainChannelResources(struct gpuDevice *device, 8706 NvHandle hClient, 8707 NvHandle hKernelChannel, 8708 gpuRetainedChannel *retainedChannel, 8709 gpuChannelInstanceInfo *channelInstanceInfo) 8710 { 8711 NV_STATUS status = NV_OK; 8712 NV2080_CTRL_GR_GET_CTX_BUFFER_INFO_PARAMS *pParams = NULL; 8713 NV2080_CTRL_FLCN_GET_CTX_BUFFER_INFO_PARAMS *pFlcnParams = NULL; 8714 gpuChannelResourceInfo *channelResourceInfo = channelInstanceInfo->resourceInfo; 8715 KernelChannel *pKernelChannel; 8716 RM_API *pRmApi; 8717 NvU32 channelEngineType = retainedChannel->channelEngineType; 8718 NvU32 i; 8719 NvU32 j; 8720 8721 NV_ASSERT(channelEngineType == UVM_GPU_CHANNEL_ENGINE_TYPE_CE || 8722 channelEngineType == UVM_GPU_CHANNEL_ENGINE_TYPE_GR || 8723 channelEngineType == UVM_GPU_CHANNEL_ENGINE_TYPE_SEC2); 8724 8725 // CE channels have 0 resources, so they skip this step 8726 if (channelEngineType == UVM_GPU_CHANNEL_ENGINE_TYPE_CE) 8727 { 8728 goto done; 8729 } 8730 8731 status = nvGpuOpsGetChannelData(retainedChannel, &pKernelChannel); 8732 if (status != NV_OK) 8733 { 8734 goto done; 8735 } 8736 8737 if (channelEngineType == UVM_GPU_CHANNEL_ENGINE_TYPE_SEC2) 8738 { 8739 // get engine context memdesc, then get its PTEs. 8740 MEMORY_DESCRIPTOR *pMemDesc = NULL; 8741 8742 // single buffer 8743 NV_ASSERT_OR_GOTO(NV_ARRAY_ELEMENTS(channelInstanceInfo->resourceInfo) >= 1, done); 8744 8745 pFlcnParams = portMemAllocNonPaged(sizeof(*pFlcnParams)); 8746 if (pFlcnParams == NULL) 8747 { 8748 status = NV_ERR_NO_MEMORY; 8749 goto done; 8750 } 8751 pFlcnParams->hUserClient = RES_GET_CLIENT_HANDLE(pKernelChannel); 8752 pFlcnParams->hChannel = RES_GET_HANDLE(pKernelChannel); 8753 8754 pRmApi = rmapiGetInterface(RMAPI_GPU_LOCK_INTERNAL); 8755 8756 // This RM CTRL refcounts all the resource memdescs. 8757 status = pRmApi->Control(pRmApi, 8758 retainedChannel->session->handle, 8759 retainedChannel->rmSubDevice->subDeviceHandle, 8760 NV2080_CTRL_CMD_FLCN_GET_CTX_BUFFER_INFO, 8761 pFlcnParams, 8762 sizeof(*pFlcnParams)); 8763 if (status != NV_OK) 8764 goto done; 8765 8766 gpuMemoryInfo *pGpuMemoryInfo = &channelResourceInfo[0].resourceInfo; 8767 8768 channelResourceInfo[0].resourceDescriptor = pFlcnParams->bufferHandle; 8769 channelResourceInfo[0].alignment = pFlcnParams->alignment; 8770 pGpuMemoryInfo->pageSize = pFlcnParams->pageSize; 8771 pGpuMemoryInfo->size = pFlcnParams->size; 8772 pGpuMemoryInfo->contig = pFlcnParams->bIsContigous; 8773 pGpuMemoryInfo->physAddr = pFlcnParams->physAddr; 8774 pGpuMemoryInfo->kind = pFlcnParams->kind; 8775 pGpuMemoryInfo->sysmem = pFlcnParams->aperture == ADDR_SYSMEM; 8776 pGpuMemoryInfo->deviceDescendant = pFlcnParams->bDeviceDescendant; 8777 8778 portMemCopy(pGpuMemoryInfo->uuid.uuid, sizeof(pGpuMemoryInfo->uuid.uuid), 8779 pFlcnParams->uuid, sizeof(pFlcnParams->uuid)); 8780 8781 status = _shadowMemdescCreateFlcn(retainedChannel, pFlcnParams, &pMemDesc); 8782 if (status != NV_OK) 8783 goto done; 8784 8785 channelResourceInfo[0].resourceDescriptor = (NvP64) pMemDesc; 8786 retainedChannel->resourceMemDesc[0] = pMemDesc; 8787 8788 channelInstanceInfo->resourceCount = 1; 8789 retainedChannel->resourceCount = 1; 8790 goto done; 8791 } 8792 8793 pParams = portMemAllocNonPaged(sizeof(*pParams)); 8794 if (pParams == NULL) 8795 { 8796 status = NV_ERR_NO_MEMORY; 8797 goto done; 8798 } 8799 8800 pParams->hUserClient = RES_GET_CLIENT_HANDLE(pKernelChannel); 8801 pParams->hChannel = RES_GET_HANDLE(pKernelChannel); 8802 8803 pRmApi = rmapiGetInterface(RMAPI_GPU_LOCK_INTERNAL); 8804 8805 // This RM CTRL refcounts all the resource memdescs. 8806 status = pRmApi->Control(pRmApi, 8807 retainedChannel->session->handle, 8808 retainedChannel->rmSubDevice->subDeviceHandle, 8809 NV2080_CTRL_CMD_GR_GET_CTX_BUFFER_INFO, 8810 pParams, 8811 sizeof(*pParams)); 8812 if (status != NV_OK) 8813 goto done; 8814 8815 NV_ASSERT(pParams->bufferCount <= NV_ARRAY_ELEMENTS(channelInstanceInfo->resourceInfo)); 8816 8817 for (i = 0; i < pParams->bufferCount; i++) 8818 { 8819 MEMORY_DESCRIPTOR *pMemDesc = NULL; 8820 NV2080_CTRL_GR_CTX_BUFFER_INFO *pCtxBufferInfo = &pParams->ctxBufferInfo[i]; 8821 gpuMemoryInfo *pGpuMemoryInfo = &channelResourceInfo[i].resourceInfo; 8822 8823 channelResourceInfo[i].resourceDescriptor = pCtxBufferInfo->bufferHandle; 8824 channelResourceInfo[i].resourceId = pCtxBufferInfo->bufferType; 8825 channelResourceInfo[i].alignment = pCtxBufferInfo->alignment; 8826 pGpuMemoryInfo->pageSize = pCtxBufferInfo->pageSize; 8827 pGpuMemoryInfo->size = pCtxBufferInfo->size; 8828 pGpuMemoryInfo->contig = pCtxBufferInfo->bIsContigous; 8829 pGpuMemoryInfo->physAddr = pCtxBufferInfo->physAddr; 8830 pGpuMemoryInfo->kind = pCtxBufferInfo->kind; 8831 pGpuMemoryInfo->sysmem = pCtxBufferInfo->aperture == ADDR_SYSMEM; 8832 pGpuMemoryInfo->deviceDescendant = pCtxBufferInfo->bDeviceDescendant; 8833 8834 portMemCopy(pGpuMemoryInfo->uuid.uuid, sizeof(pGpuMemoryInfo->uuid.uuid), 8835 pCtxBufferInfo->uuid, sizeof(pCtxBufferInfo->uuid)); 8836 8837 status = _shadowMemdescCreate(retainedChannel, pCtxBufferInfo, &pMemDesc); 8838 if (status != NV_OK) 8839 goto cleanup; 8840 8841 channelResourceInfo[i].resourceDescriptor = (NvP64) pMemDesc; 8842 retainedChannel->resourceMemDesc[i] = pMemDesc; 8843 } 8844 8845 channelInstanceInfo->resourceCount = pParams->bufferCount; 8846 retainedChannel->resourceCount = pParams->bufferCount; 8847 8848 cleanup: 8849 if (status != NV_OK) 8850 { 8851 for (j = 0; j < i; j++) 8852 { 8853 _shadowMemdescDestroy(retainedChannel, retainedChannel->resourceMemDesc[j]); 8854 } 8855 } 8856 8857 done: 8858 portMemFree(pParams); 8859 portMemFree(pFlcnParams); 8860 return status; 8861 } 8862 8863 static void _nvGpuOpsReleaseChannelResources(gpuRetainedChannel *retainedChannel) 8864 { 8865 NvU32 i; 8866 NvU32 descriptorCount = retainedChannel->resourceCount; 8867 8868 for (i = 0; i < descriptorCount; i++) 8869 { 8870 MEMORY_DESCRIPTOR *pMemDesc = retainedChannel->resourceMemDesc[i]; 8871 8872 _shadowMemdescDestroy(retainedChannel, pMemDesc); 8873 } 8874 } 8875 8876 NV_STATUS nvGpuOpsGetChannelResourcePtes(struct gpuAddressSpace *vaSpace, 8877 NvP64 resourceDescriptor, 8878 NvU64 offset, 8879 NvU64 size, 8880 gpuExternalMappingInfo *pGpuExternalMappingInfo) 8881 { 8882 NV_STATUS status = NV_OK; 8883 nvGpuOpsLockSet acquiredLocks; 8884 THREAD_STATE_NODE threadState; 8885 PMEMORY_DESCRIPTOR pMemDesc = NULL; 8886 OBJGPU *pMappingGpu = NULL; 8887 OBJVASPACE *pVAS = NULL; 8888 RsClient *pClient; 8889 Subdevice *pSubDevice; 8890 8891 if (!vaSpace || !resourceDescriptor || !pGpuExternalMappingInfo) 8892 return NV_ERR_INVALID_ARGUMENT; 8893 8894 if (pGpuExternalMappingInfo->mappingPageSize != 0) 8895 { 8896 return NV_ERR_NOT_SUPPORTED; 8897 } 8898 8899 threadStateInit(&threadState, THREAD_STATE_FLAGS_NONE); 8900 status = _nvGpuOpsLocksAcquireAll(RMAPI_LOCK_FLAGS_READ, 8901 vaSpace->device->session->handle, 8902 &pClient, 8903 &acquiredLocks); 8904 if (status != NV_OK) 8905 { 8906 threadStateFree(&threadState, THREAD_STATE_FLAGS_NONE); 8907 return status; 8908 } 8909 8910 pMemDesc = (MEMORY_DESCRIPTOR *) NvP64_VALUE(resourceDescriptor); 8911 8912 status = subdeviceGetByHandle(pClient, vaSpace->device->subhandle, &pSubDevice); 8913 if (status != NV_OK) 8914 { 8915 _nvGpuOpsLocksRelease(&acquiredLocks); 8916 threadStateFree(&threadState, THREAD_STATE_FLAGS_NONE); 8917 return status; 8918 } 8919 8920 pMappingGpu = GPU_RES_GET_GPU(pSubDevice); 8921 8922 GPU_RES_SET_THREAD_BC_STATE(pSubDevice); 8923 8924 if (pMemDesc->pGpu != pMappingGpu) 8925 { 8926 _nvGpuOpsLocksRelease(&acquiredLocks); 8927 threadStateFree(&threadState, THREAD_STATE_FLAGS_NONE); 8928 return NV_ERR_NOT_SUPPORTED; 8929 } 8930 8931 // Do not support mapping on anything other than sysmem/vidmem! 8932 if ((memdescGetAddressSpace(pMemDesc) != ADDR_SYSMEM) && 8933 (memdescGetAddressSpace(pMemDesc) != ADDR_FBMEM)) 8934 { 8935 _nvGpuOpsLocksRelease(&acquiredLocks); 8936 threadStateFree(&threadState, THREAD_STATE_FLAGS_NONE); 8937 return NV_ERR_NOT_SUPPORTED; 8938 } 8939 8940 status = vaspaceGetByHandleOrDeviceDefault(pClient, 8941 vaSpace->device->handle, 8942 vaSpace->handle, 8943 &pVAS); 8944 if (status != NV_OK) 8945 { 8946 _nvGpuOpsLocksRelease(&acquiredLocks); 8947 threadStateFree(&threadState, THREAD_STATE_FLAGS_NONE); 8948 return status; 8949 } 8950 8951 status = nvGpuOpsBuildExternalAllocPtes(pVAS, pMappingGpu, pMemDesc, NULL, 8952 offset, size, NV_FALSE, NV_FALSE, 8953 0, pGpuExternalMappingInfo); 8954 8955 _nvGpuOpsLocksRelease(&acquiredLocks); 8956 threadStateFree(&threadState, THREAD_STATE_FLAGS_NONE); 8957 return status; 8958 } 8959 8960 NV_STATUS nvGpuOpsBindChannelResources(gpuRetainedChannel *retainedChannel, 8961 gpuChannelResourceBindParams *channelResourceBindParams) 8962 { 8963 NV_STATUS status = NV_OK; 8964 nvGpuOpsLockSet acquiredLocks; 8965 THREAD_STATE_NODE threadState; 8966 NV2080_CTRL_GPU_PROMOTE_CTX_PARAMS *pParams; 8967 NvU32 i; 8968 KernelChannel *pKernelChannel = NULL; 8969 RM_API *pRmApi = rmapiGetInterface(RMAPI_GPU_LOCK_INTERNAL); 8970 8971 threadStateInit(&threadState, THREAD_STATE_FLAGS_NONE); 8972 8973 status = _nvGpuOpsLocksAcquireAll(RMAPI_LOCK_FLAGS_READ, 8974 retainedChannel->session->handle, 8975 NULL, 8976 &acquiredLocks); 8977 if (status != NV_OK) 8978 { 8979 threadStateFree(&threadState, THREAD_STATE_FLAGS_NONE); 8980 return status; 8981 } 8982 8983 status = nvGpuOpsGetChannelData(retainedChannel, &pKernelChannel); 8984 if (status != NV_OK) 8985 { 8986 _nvGpuOpsLocksRelease(&acquiredLocks); 8987 threadStateFree(&threadState, THREAD_STATE_FLAGS_NONE); 8988 return status; 8989 } 8990 8991 // Unregister channel resources. CE channels have 0 resources, so they skip this step 8992 if (retainedChannel->resourceCount != 0) 8993 { 8994 RM_ENGINE_TYPE rmEngineType; 8995 8996 pParams = portMemAllocNonPaged(sizeof(*pParams)); 8997 if (pParams == NULL) 8998 { 8999 status = NV_ERR_NO_MEMORY; 9000 _nvGpuOpsLocksRelease(&acquiredLocks); 9001 threadStateFree(&threadState, THREAD_STATE_FLAGS_NONE); 9002 return NV_ERR_INSUFFICIENT_RESOURCES; 9003 } 9004 9005 portMemSet(pParams, 0, sizeof(*pParams)); 9006 9007 pParams->hChanClient = RES_GET_CLIENT_HANDLE(pKernelChannel); 9008 pParams->hObject = RES_GET_HANDLE(pKernelChannel); 9009 pParams->entryCount = retainedChannel->resourceCount; 9010 9011 status = kfifoEngineInfoXlate_HAL(retainedChannel->pGpu, 9012 GPU_GET_KERNEL_FIFO(retainedChannel->pGpu), 9013 ENGINE_INFO_TYPE_RUNLIST, 9014 retainedChannel->runlistId, 9015 ENGINE_INFO_TYPE_RM_ENGINE_TYPE, 9016 (NvU32 *)&rmEngineType); 9017 9018 pParams->engineType = gpuGetNv2080EngineType(rmEngineType); 9019 9020 for (i = 0; i < retainedChannel->resourceCount; i++) 9021 { 9022 if (RM_ENGINE_TYPE_IS_GR(rmEngineType)) 9023 pParams->promoteEntry[i].bufferId = channelResourceBindParams[i].resourceId; 9024 9025 pParams->promoteEntry[i].gpuVirtAddr = channelResourceBindParams[i].resourceVa; 9026 } 9027 9028 status = pRmApi->Control(pRmApi, 9029 retainedChannel->session->handle, 9030 retainedChannel->rmSubDevice->subDeviceHandle, 9031 NV2080_CTRL_CMD_GPU_PROMOTE_CTX, 9032 pParams, 9033 sizeof(*pParams)); 9034 9035 portMemFree(pParams); 9036 } 9037 9038 if (NV_OK == status) 9039 { 9040 pKernelChannel->bIsContextBound = NV_TRUE; 9041 } 9042 9043 _nvGpuOpsLocksRelease(&acquiredLocks); 9044 threadStateFree(&threadState, THREAD_STATE_FLAGS_NONE); 9045 return status; 9046 } 9047 9048 // nvGpuOpsRetainChannelResources only increments the ref-counts of the memdescs under the channel. 9049 // It does not prevent the user from freeing the associated hClient and hChannel handles, which means 9050 // the instance pointer may no longer be associated with a user object at this point. 9051 // If the instance pointer still has an associated channel, the channel is preempted and disabled. 9052 // Otherwise that must have already happened, so we just need to drop the ref counts on the resources 9053 void nvGpuOpsStopChannel(gpuRetainedChannel *retainedChannel, 9054 NvBool bImmediate) 9055 { 9056 NV_STATUS status = NV_OK; 9057 nvGpuOpsLockSet acquiredLocks; 9058 THREAD_STATE_NODE threadState; 9059 KernelChannel *pKernelChannel = NULL; 9060 RsResourceRef *pResourceRef; 9061 RM_API *pRmApi = rmapiGetInterface(RMAPI_GPU_LOCK_INTERNAL); 9062 NVA06F_CTRL_STOP_CHANNEL_PARAMS stopChannelParams = {0}; 9063 9064 threadStateInit(&threadState, THREAD_STATE_FLAGS_NONE); 9065 9066 if (_nvGpuOpsLocksAcquireAll(RMAPI_LOCK_FLAGS_READ, 9067 retainedChannel->session->handle, 9068 NULL, 9069 &acquiredLocks) != NV_OK) 9070 { 9071 threadStateFree(&threadState, THREAD_STATE_FLAGS_NONE); 9072 return; 9073 } 9074 9075 status = nvGpuOpsGetChannelData(retainedChannel, &pKernelChannel); 9076 if (status != NV_OK) 9077 { 9078 _nvGpuOpsLocksRelease(&acquiredLocks); 9079 threadStateFree(&threadState, THREAD_STATE_FLAGS_NONE); 9080 return; 9081 } 9082 9083 // Verify this channel handle is still valid 9084 status = serverutilGetResourceRef(RES_GET_CLIENT_HANDLE(pKernelChannel), RES_GET_HANDLE(pKernelChannel), &pResourceRef); 9085 if (status != NV_OK) 9086 { 9087 NV_ASSERT(0); 9088 _nvGpuOpsLocksRelease(&acquiredLocks); 9089 threadStateFree(&threadState, THREAD_STATE_FLAGS_NONE); 9090 return; 9091 } 9092 9093 stopChannelParams.bImmediate = bImmediate; 9094 NV_ASSERT_OK( 9095 pRmApi->Control(pRmApi, 9096 RES_GET_CLIENT_HANDLE(pKernelChannel), 9097 RES_GET_HANDLE(pKernelChannel), 9098 NVA06F_CTRL_CMD_STOP_CHANNEL, 9099 &stopChannelParams, 9100 sizeof(stopChannelParams))); 9101 9102 pKernelChannel->bIsContextBound = NV_FALSE; 9103 9104 if (retainedChannel->channelEngineType == UVM_GPU_CHANNEL_ENGINE_TYPE_GR) 9105 { 9106 NV2080_CTRL_GPU_EVICT_CTX_PARAMS params; 9107 9108 portMemSet(¶ms, 0, sizeof(params)); 9109 params.engineType = NV2080_ENGINE_TYPE_GR(0); 9110 params.hClient = retainedChannel->session->handle; 9111 params.hChanClient = RES_GET_CLIENT_HANDLE(pKernelChannel); 9112 params.hObject = RES_GET_HANDLE(pKernelChannel); 9113 9114 NV_ASSERT_OK( 9115 pRmApi->Control(pRmApi, 9116 retainedChannel->session->handle, 9117 retainedChannel->rmSubDevice->subDeviceHandle, 9118 NV2080_CTRL_CMD_GPU_EVICT_CTX, 9119 ¶ms, 9120 sizeof(params))); 9121 } 9122 9123 _nvGpuOpsLocksRelease(&acquiredLocks); 9124 threadStateFree(&threadState, THREAD_STATE_FLAGS_NONE); 9125 } 9126 9127 // Make sure the UVM and PMA structs are in sync 9128 // The following location(s) need to be synced as well: 9129 // - uvm8_pmm_gpu.c:uvm8_test_pmm_query_pma_stats 9130 ct_assert(sizeof(UvmPmaStatistics) == sizeof(PMA_STATS)); 9131 ct_assert(NV_OFFSETOF(UvmPmaStatistics, numPages2m) == NV_OFFSETOF(PMA_STATS, num2mbPages)); 9132 ct_assert(NV_OFFSETOF(UvmPmaStatistics, numFreePages64k) == NV_OFFSETOF(PMA_STATS, numFreeFrames)); 9133 ct_assert(NV_OFFSETOF(UvmPmaStatistics, numFreePages2m) == NV_OFFSETOF(PMA_STATS, numFree2mbPages)); 9134 ct_assert(NV_OFFSETOF(UvmPmaStatistics, numPages2mProtected) == NV_OFFSETOF(PMA_STATS, num2mbPagesProtected)); 9135 ct_assert(NV_OFFSETOF(UvmPmaStatistics, numFreePages64kProtected) == NV_OFFSETOF(PMA_STATS, numFreeFramesProtected)); 9136 ct_assert(NV_OFFSETOF(UvmPmaStatistics, numFreePages2mProtected) == NV_OFFSETOF(PMA_STATS, numFree2mbPagesProtected)); 9137 9138 /*! 9139 * Retrieve the PMA (Physical Memory Allocator) object initialized by RM 9140 * for the given device. 9141 * 9142 * @param[in] device device handle obtained in a prior call 9143 * to nvGpuOpsRmDeviceCreate. 9144 * 9145 * @param[out] pPmaObject Void pointer to RM PMA object of associated GPU 9146 * NULL if PMA not enabled & initialized. 9147 * @param[out] pPmaPubStats Pointer to UVM PMA statistics object of 9148 * associated GPU. Cannot be NULL. 9149 * 9150 * @returns NV_OK on success, 9151 * NV_ERR_INVALID_ARGUMENT if NULL pPmaObject, 9152 * NV_ERR_OBJECT_NOT_FOUND if PMA object not found 9153 * NV_ERR_NOT_SUPPORTED if PMA not supported 9154 */ 9155 NV_STATUS nvGpuOpsGetPmaObject(struct gpuDevice *device, 9156 void **pPmaObject, 9157 const UvmPmaStatistics **pPmaStats) 9158 { 9159 nvGpuOpsLockSet acquiredLocks; 9160 THREAD_STATE_NODE threadState; 9161 OBJGPU *pGpu = NULL; 9162 Heap *pHeap = NULL; 9163 MemoryManager *pMemoryManager; 9164 struct gpuSession *session = device->session; 9165 NV_STATUS status; 9166 9167 threadStateInit(&threadState, THREAD_STATE_FLAGS_NONE); 9168 status = _nvGpuOpsLocksAcquireAll(RMAPI_LOCK_FLAGS_READ, session->handle, NULL, &acquiredLocks); 9169 if (status != NV_OK) 9170 { 9171 threadStateFree(&threadState, THREAD_STATE_FLAGS_NONE); 9172 return status; 9173 } 9174 9175 NV_CHECK_OK_OR_GOTO(status, LEVEL_ERROR, 9176 CliSetGpuContext(session->handle, device->handle, &pGpu, NULL), 9177 done); 9178 9179 pMemoryManager = GPU_GET_MEMORY_MANAGER(pGpu); 9180 NV_CHECK_OR_ELSE(LEVEL_ERROR, 9181 pMemoryManager != NULL, 9182 status = NV_ERR_OBJECT_NOT_FOUND; goto done; ); 9183 9184 if (IS_MIG_IN_USE(pGpu)) 9185 { 9186 KernelMIGManager *pKernelMIGManager = GPU_GET_KERNEL_MIG_MANAGER(pGpu); 9187 RsClient *pClient; 9188 Device *pDevice; 9189 9190 NV_ASSERT_OK_OR_GOTO(status, 9191 serverGetClientUnderLock(&g_resServ, session->handle, &pClient), 9192 done); 9193 9194 NV_ASSERT_OK_OR_GOTO(status, 9195 deviceGetByHandle(pClient, device->handle, &pDevice), 9196 done); 9197 9198 NV_CHECK_OK_OR_GOTO(status, LEVEL_ERROR, 9199 kmigmgrGetMemoryPartitionHeapFromDevice(pGpu, pKernelMIGManager, pDevice, &pHeap), 9200 done); 9201 } 9202 else 9203 pHeap = GPU_GET_HEAP(pGpu); 9204 9205 NV_CHECK_OR_ELSE(LEVEL_ERROR, 9206 pHeap != NULL, 9207 status = NV_ERR_OBJECT_NOT_FOUND; goto done; ); 9208 9209 NV_CHECK_OR_ELSE(LEVEL_ERROR, 9210 memmgrIsPmaInitialized(pMemoryManager), 9211 status = NV_ERR_OBJECT_NOT_FOUND; goto done; ); 9212 9213 *pPmaObject = (void *)&pHeap->pmaObject; 9214 *pPmaStats = (const UvmPmaStatistics *)&pHeap->pmaObject.pmaStats; 9215 9216 done: 9217 _nvGpuOpsLocksRelease(&acquiredLocks); 9218 threadStateFree(&threadState, THREAD_STATE_FLAGS_NONE); 9219 return status; 9220 } 9221 9222 NV_STATUS nvGpuOpsP2pObjectCreate(struct gpuDevice *device1, 9223 struct gpuDevice *device2, 9224 NvHandle *hP2pObject) 9225 { 9226 NV_STATUS status; 9227 NV503B_ALLOC_PARAMETERS p2pAllocParams = {0}; 9228 NvHandle hTemp = 0; 9229 struct systemP2PCaps p2pCaps; 9230 RM_API *pRmApi = rmapiGetInterface(RMAPI_EXTERNAL_KERNEL); 9231 struct gpuSession *session; 9232 9233 if (!device1 || !device2 || !hP2pObject) 9234 return NV_ERR_INVALID_ARGUMENT; 9235 9236 if (device1->session != device2->session) 9237 return NV_ERR_INVALID_ARGUMENT; 9238 9239 status = getSystemP2PCaps(device1, device2, &p2pCaps); 9240 if (status != NV_OK) 9241 return status; 9242 9243 if (!p2pCaps.accessSupported) 9244 return NV_ERR_NOT_SUPPORTED; 9245 9246 p2pAllocParams.hSubDevice = device1->subhandle; 9247 p2pAllocParams.hPeerSubDevice = device2->subhandle; 9248 9249 session = device1->session; 9250 hTemp = NV01_NULL_OBJECT; 9251 status = pRmApi->Alloc(pRmApi, session->handle, session->handle, &hTemp, NV50_P2P, &p2pAllocParams, sizeof(p2pAllocParams)); 9252 if (status == NV_OK) 9253 *hP2pObject = hTemp; 9254 9255 return status; 9256 } 9257 9258 NV_STATUS nvGpuOpsP2pObjectDestroy(struct gpuSession *session, 9259 NvHandle hP2pObject) 9260 { 9261 NV_STATUS status = NV_OK; 9262 RM_API *pRmApi = rmapiGetInterface(RMAPI_EXTERNAL_KERNEL); 9263 NV_ASSERT(session); 9264 9265 status = pRmApi->Free(pRmApi, session->handle, hP2pObject); 9266 NV_ASSERT(status == NV_OK); 9267 return status; 9268 } 9269 9270 NV_STATUS nvGpuOpsReportNonReplayableFault(struct gpuDevice *device, 9271 const void *pFaultPacket) 9272 { 9273 NV_STATUS status = NV_OK; 9274 NV2080_CTRL_GPU_REPORT_NON_REPLAYABLE_FAULT_PARAMS params; 9275 RM_API *pRmApi = rmapiGetInterface(RMAPI_EXTERNAL_KERNEL); 9276 9277 if (device == NULL || pFaultPacket == NULL) 9278 return NV_ERR_INVALID_ARGUMENT; 9279 9280 portMemSet(¶ms, 0, sizeof(params)); 9281 9282 portMemCopy(¶ms.faultPacket.data, 9283 NV2080_CTRL_GPU_FAULT_PACKET_SIZE, 9284 pFaultPacket, 9285 NV2080_CTRL_GPU_FAULT_PACKET_SIZE); 9286 9287 status = pRmApi->Control(pRmApi, 9288 device->session->handle, 9289 device->subhandle, 9290 NV2080_CTRL_CMD_GPU_REPORT_NON_REPLAYABLE_FAULT, 9291 ¶ms, 9292 sizeof(params)); 9293 if (status != NV_OK) 9294 { 9295 NV_PRINTF(LEVEL_ERROR, 9296 "%s: NV2080_CTRL_CMD_GPU_REPORT_NON_REPLAYABLE_FAULTreturned error %s!\n", 9297 __FUNCTION__, nvstatusToString(status)); 9298 } 9299 9300 return status; 9301 } 9302 9303 NV_STATUS nvGpuOpsPagingChannelAllocate(struct gpuDevice *device, 9304 const gpuPagingChannelAllocParams *params, 9305 gpuPagingChannelHandle *channelHandle, 9306 gpuPagingChannelInfo *channelInfo) 9307 { 9308 NV_STATUS status, status2; 9309 UvmGpuPagingChannel *channel = NULL; 9310 Device *pDevice; 9311 RsClient *pClient; 9312 NvHandle hClient; 9313 NvLength errorNotifierSize; 9314 NvU64 paOffset; 9315 gpuAllocInfo allocInfo = {0}; 9316 RM_API *pRmApi = rmapiGetInterface(RMAPI_EXTERNAL_KERNEL); 9317 NvU32 pid = osGetCurrentProcess(); 9318 9319 if (!device || !params || !channelHandle || !channelInfo) 9320 return NV_ERR_INVALID_ARGUMENT; 9321 9322 if (!NV2080_ENGINE_TYPE_IS_COPY(NV2080_ENGINE_TYPE_COPY(params->engineIndex))) 9323 return NV_ERR_INVALID_ARGUMENT; 9324 9325 hClient = device->session->handle; 9326 NV_ASSERT(hClient); 9327 9328 channel = portMemAllocNonPaged(sizeof(*channel)); 9329 if (!channel) 9330 return NV_ERR_NO_MEMORY; 9331 9332 portMemSet(channel, 0, sizeof(*channel)); 9333 channel->device = device; 9334 9335 errorNotifierSize = sizeof(NvNotification) * 9336 NV_CHANNELGPFIFO_NOTIFICATION_TYPE__SIZE_1; 9337 status = nvGpuOpsAllocPhysical(device, 9338 NV_TRUE, 9339 errorNotifierSize, 9340 &paOffset, 9341 &allocInfo); 9342 if (status != NV_OK) 9343 goto cleanup_free_channel; 9344 9345 channel->errorNotifierHandle = allocInfo.hPhysHandle; 9346 NV_ASSERT(channel->errorNotifierHandle); 9347 9348 status = pRmApi->MapToCpu(pRmApi, 9349 hClient, 9350 device->subhandle, 9351 channel->errorNotifierHandle, 9352 0, 9353 errorNotifierSize, 9354 (void **)&channel->errorNotifier, 9355 0); 9356 if (status != NV_OK) 9357 goto cleanup_free_error_notifier; 9358 9359 NV_ASSERT(channel->errorNotifier); 9360 9361 // Ideally, we need to acquire there locks (in that order): 9362 // a. RM API lock 9363 // b. device->handle GPU lock 9364 // c. RPC lock 9365 // (b) GPU lock is optional because RM will acquire all needed locks automatically. 9366 // (c) RPC lock is optional because currently there is no scenario in which channel allocation/destruction 9367 // can be run concurrently with any other SR-IOV heavy API that results on an RPC (Map/Unmap/PushStream). 9368 // 9369 // However, if we acquire GPU locks, NV_RM_RPC_UVM_PAGING_CHANNEL_ALLOCATE would fail. 9370 // It's because PAGING_CHANNEL_ALLOCATE allocates AMPERE_CHANNEL_GPFIFO_A, that allocates 9371 // KernelChannelGroupApi. KernelChannelGroupApi would fail because 9372 // 'TSG alloc should be called without acquiring GPU lock'. 9373 // KernelChannelGroupApi acquires GPU locks manually after allocating TSG. 9374 // 9375 // The TSG allocation requirement just described not only precludes the acquisition 9376 // of any GPU lock in this function, but also the acquisition of the RPC lock, 9377 // because it would result on a lock order violation: the RPC lock is acquired 9378 // before the GPU lock. As a result, nvGpuOpsPagingChannelAllocate only acquires 9379 // the RM API lock, and so does nvGpuOpsPagingChannelDestroy. 9380 status = rmapiLockAcquire(RMAPI_LOCK_FLAGS_READ, RM_LOCK_MODULES_GPU_OPS); 9381 if (status != NV_OK) 9382 goto cleanup_unmap_error_notifier; 9383 9384 status = serverGetClientUnderLock(&g_resServ, hClient, &pClient); 9385 if (status != NV_OK) 9386 goto cleanup_under_rmapi_lock; 9387 9388 status = deviceGetByHandle(pClient, device->handle, &pDevice); 9389 if (status != NV_OK) 9390 goto cleanup_under_rmapi_lock; 9391 9392 channel->pDevice = pDevice; 9393 9394 GPU_RES_SET_THREAD_BC_STATE(pDevice); 9395 9396 if (status != NV_OK) 9397 goto cleanup_under_rmapi_lock; 9398 9399 rmapiLockRelease(); 9400 9401 *channelHandle = channel; 9402 9403 channelInfo->shadowErrorNotifier = channel->errorNotifier; 9404 9405 return NV_OK; 9406 9407 cleanup_under_rmapi_lock: 9408 rmapiLockRelease(); 9409 9410 cleanup_unmap_error_notifier: 9411 status2 = pRmApi->UnmapFromCpu(pRmApi, 9412 hClient, 9413 device->subhandle, 9414 channel->errorNotifierHandle, 9415 (void *)channel->errorNotifier, 9416 0, 9417 pid); 9418 NV_ASSERT(status2 == NV_OK); 9419 9420 cleanup_free_error_notifier: 9421 pRmApi->Free(pRmApi, hClient, channel->errorNotifierHandle); 9422 9423 cleanup_free_channel: 9424 portMemFree(channel); 9425 9426 return status; 9427 } 9428 9429 void nvGpuOpsPagingChannelDestroy(UvmGpuPagingChannel *channel) 9430 { 9431 NV_STATUS status; 9432 struct gpuDevice *device; 9433 Device *pDevice; 9434 RsClient *pClient; 9435 NvHandle hClient; 9436 RM_API *pRmApi = rmapiGetInterface(RMAPI_EXTERNAL_KERNEL); 9437 NvU32 pid = osGetCurrentProcess(); 9438 9439 NV_ASSERT(channel); 9440 9441 device = channel->device; 9442 NV_ASSERT(device); 9443 9444 hClient = device->session->handle; 9445 NV_ASSERT(hClient); 9446 9447 // We acquire only RM API lock here. See comment in nvGpuOpsPagingChannelAllocate. 9448 status = rmapiLockAcquire(RMAPI_LOCK_FLAGS_READ, RM_LOCK_MODULES_GPU_OPS); 9449 NV_ASSERT(status == NV_OK); 9450 if (status != NV_OK) 9451 { 9452 NV_PRINTF(LEVEL_ERROR, 9453 "%s: rmapiLockAcquire returned error %s!\n", 9454 __FUNCTION__, nvstatusToString(status)); 9455 goto cleanup; 9456 } 9457 9458 status = serverGetClientUnderLock(&g_resServ, hClient, &pClient); 9459 NV_ASSERT(status == NV_OK); 9460 if (status != NV_OK) 9461 { 9462 NV_PRINTF(LEVEL_ERROR, 9463 "%s: serverGetClientUnderLock returned error %s!\n", 9464 __FUNCTION__, nvstatusToString(status)); 9465 goto cleanup_under_rmapi_lock; 9466 } 9467 9468 status = deviceGetByHandle(pClient, device->handle, &pDevice); 9469 NV_ASSERT(status == NV_OK); 9470 if (status != NV_OK) 9471 { 9472 NV_PRINTF(LEVEL_ERROR, 9473 "%s: deviceGetByHandle returned error %s!\n", 9474 __FUNCTION__, nvstatusToString(status)); 9475 goto cleanup_under_rmapi_lock; 9476 } 9477 9478 GPU_RES_SET_THREAD_BC_STATE(pDevice); 9479 9480 cleanup_under_rmapi_lock: 9481 rmapiLockRelease(); 9482 9483 cleanup: 9484 status = pRmApi->UnmapFromCpu(pRmApi, 9485 hClient, 9486 device->subhandle, 9487 channel->errorNotifierHandle, 9488 (void *)channel->errorNotifier, 9489 0, 9490 pid); 9491 NV_ASSERT(status == NV_OK); 9492 if (status != NV_OK) 9493 { 9494 NV_PRINTF(LEVEL_ERROR, 9495 "%s: UnmapFromCpu returned error %s!\n", 9496 __FUNCTION__, nvstatusToString(status)); 9497 } 9498 9499 pRmApi->Free(pRmApi, hClient, channel->errorNotifierHandle); 9500 portMemFree(channel); 9501 } 9502 9503 NV_STATUS nvGpuOpsPagingChannelsMap(struct gpuAddressSpace *srcVaSpace, 9504 NvU64 srcAddress, 9505 struct gpuDevice *device, 9506 NvU64 *dstAddress) 9507 { 9508 NV_STATUS status; 9509 Device *pDevice; 9510 RsClient *pClient; 9511 NvHandle hAllocation; 9512 NvHandle hClient; 9513 nvGpuOpsLockSet acquiredLocks; 9514 9515 if (!srcVaSpace || !device || !dstAddress) 9516 return NV_ERR_INVALID_ARGUMENT; 9517 9518 hClient = device->session->handle; 9519 NV_ASSERT(hClient); 9520 9521 9522 status = getHandleForVirtualAddr(srcVaSpace, srcAddress, NV_TRUE, &hAllocation); 9523 if (status != NV_OK) 9524 return status; 9525 9526 status = _nvGpuOpsLocksAcquire(RMAPI_LOCK_FLAGS_NONE, hClient, NULL, 2, 9527 device->deviceInstance, srcVaSpace->device->deviceInstance, &acquiredLocks); 9528 if (status != NV_OK) 9529 return status; 9530 9531 status = serverGetClientUnderLock(&g_resServ, hClient, &pClient); 9532 if (status != NV_OK) 9533 goto exit_under_locks; 9534 9535 status = deviceGetByHandle(pClient, device->handle, &pDevice); 9536 if (status != NV_OK) 9537 goto exit_under_locks; 9538 9539 GPU_RES_SET_THREAD_BC_STATE(pDevice); 9540 9541 portSyncMutexAcquire(device->pPagingChannelRpcMutex); 9542 9543 portSyncMutexRelease(device->pPagingChannelRpcMutex); 9544 9545 exit_under_locks: 9546 _nvGpuOpsLocksRelease(&acquiredLocks); 9547 9548 return status; 9549 } 9550 9551 void nvGpuOpsPagingChannelsUnmap(struct gpuAddressSpace *srcVaSpace, 9552 NvU64 srcAddress, 9553 struct gpuDevice *device) 9554 { 9555 NV_STATUS status; 9556 Device *pDevice; 9557 RsClient *pClient; 9558 NvHandle hAllocation; 9559 NvHandle hClient; 9560 nvGpuOpsLockSet acquiredLocks; 9561 9562 NV_ASSERT(srcVaSpace && device); 9563 if (!srcVaSpace || !device) 9564 return; 9565 9566 hClient = device->session->handle; 9567 NV_ASSERT(hClient); 9568 9569 status = getHandleForVirtualAddr(srcVaSpace, srcAddress, NV_TRUE, &hAllocation); 9570 NV_ASSERT(status == NV_OK); 9571 if (status != NV_OK) 9572 { 9573 NV_PRINTF(LEVEL_ERROR, 9574 "%s: getHandleForVirtualAddr returned error %s!\n", 9575 __FUNCTION__, nvstatusToString(status)); 9576 return; 9577 } 9578 9579 status = _nvGpuOpsLocksAcquire(RMAPI_LOCK_FLAGS_NONE, hClient, NULL, 2, 9580 device->deviceInstance, srcVaSpace->device->deviceInstance, &acquiredLocks); 9581 if (status != NV_OK) 9582 { 9583 NV_PRINTF(LEVEL_ERROR, 9584 "%s: _nvGpuOpsLocksAcquire returned error %s!\n", 9585 __FUNCTION__, nvstatusToString(status)); 9586 return; 9587 } 9588 9589 status = serverGetClientUnderLock(&g_resServ, hClient, &pClient); 9590 NV_ASSERT(status == NV_OK); 9591 if (status != NV_OK) 9592 { 9593 NV_PRINTF(LEVEL_ERROR, 9594 "%s: serverGetClientUnderLock returned error %s!\n", 9595 __FUNCTION__, nvstatusToString(status)); 9596 goto exit_under_locks; 9597 } 9598 9599 status = deviceGetByHandle(pClient, device->handle, &pDevice); 9600 NV_ASSERT(status == NV_OK); 9601 if (status != NV_OK) 9602 { 9603 NV_PRINTF(LEVEL_ERROR, 9604 "%s: deviceGetByHandle returned error %s!\n", 9605 __FUNCTION__, nvstatusToString(status)); 9606 goto exit_under_locks; 9607 } 9608 9609 GPU_RES_SET_THREAD_BC_STATE(pDevice); 9610 9611 portSyncMutexAcquire(device->pPagingChannelRpcMutex); 9612 9613 portSyncMutexRelease(device->pPagingChannelRpcMutex); 9614 9615 exit_under_locks: 9616 _nvGpuOpsLocksRelease(&acquiredLocks); 9617 } 9618 9619 NV_STATUS nvGpuOpsPagingChannelPushStream(UvmGpuPagingChannel *channel, 9620 char *methodStream, 9621 NvU32 methodStreamSize) 9622 { 9623 NV_STATUS status = NV_OK; 9624 struct gpuDevice *device = NULL; 9625 9626 if (!channel || !methodStream) 9627 return NV_ERR_INVALID_ARGUMENT; 9628 if (methodStreamSize == 0) 9629 return NV_OK; 9630 9631 device = channel->device; 9632 NV_ASSERT(device); 9633 9634 GPU_RES_SET_THREAD_BC_STATE(channel->pDevice); 9635 9636 portSyncMutexAcquire(device->pPagingChannelRpcMutex); 9637 9638 portSyncMutexRelease(device->pPagingChannelRpcMutex); 9639 9640 return status; 9641 } 9642 9643 static NV_STATUS nvGpuOpsGetMemoryByHandle(NvHandle hClient, NvHandle hMemory, Memory **ppMemory) 9644 { 9645 RsClient *pRsClient = NULL; 9646 9647 NV_ASSERT_OK_OR_RETURN(serverGetClientUnderLock(&g_resServ, 9648 hClient, &pRsClient)); 9649 9650 return memGetByHandle(pRsClient, 9651 hMemory, 9652 ppMemory); 9653 } 9654 9655 NV_STATUS nvGpuOpsCcslContextInit(struct ccslContext_t **ctx, 9656 struct gpuChannel *channel) 9657 { 9658 if ((ctx == NULL) || (channel == NULL)) 9659 { 9660 return NV_ERR_INVALID_ARGUMENT; 9661 } 9662 9663 return ccslContextInitViaChannel(ctx, channel->tsg->vaSpace->device->session->handle, channel->channelHandle); 9664 } 9665 9666 NV_STATUS nvGpuOpsCcslContextClear(struct ccslContext_t *ctx) 9667 { 9668 if (ctx == NULL) 9669 { 9670 return NV_ERR_INVALID_ARGUMENT; 9671 } 9672 9673 ccslContextClear(ctx); 9674 return NV_OK; 9675 } 9676 9677 NV_STATUS nvGpuOpsCcslRotateIv(struct ccslContext_t *ctx, NvU8 direction) 9678 { 9679 if (ctx == NULL) 9680 { 9681 return NV_ERR_INVALID_ARGUMENT; 9682 } 9683 9684 return ccslRotateIv(ctx, direction); 9685 } 9686 9687 NV_STATUS nvGpuOpsCcslEncryptWithIv(struct ccslContext_t *ctx, 9688 NvU32 bufferSize, 9689 NvU8 const *inputBuffer, 9690 NvU8 *encryptIv, 9691 NvU8 *outputBuffer, 9692 NvU8 *authTagBuffer) 9693 { 9694 if (ctx == NULL) 9695 { 9696 return NV_ERR_INVALID_ARGUMENT; 9697 } 9698 return ccslEncryptWithIv(ctx, bufferSize, inputBuffer, encryptIv, NULL, 0, 9699 outputBuffer, authTagBuffer); 9700 } 9701 9702 NV_STATUS nvGpuOpsCcslEncrypt(struct ccslContext_t *ctx, 9703 NvU32 bufferSize, 9704 NvU8 const *inputBuffer, 9705 NvU8 *outputBuffer, 9706 NvU8 *authTagBuffer) 9707 { 9708 if (ctx == NULL) 9709 { 9710 return NV_ERR_INVALID_ARGUMENT; 9711 } 9712 9713 return ccslEncrypt(ctx, bufferSize, inputBuffer, NULL, 0, 9714 outputBuffer, authTagBuffer); 9715 } 9716 9717 NV_STATUS nvGpuOpsCcslDecrypt(struct ccslContext_t *ctx, 9718 NvU32 bufferSize, 9719 NvU8 const *inputBuffer, 9720 NvU8 const *decryptIv, 9721 NvU8 *outputBuffer, 9722 NvU8 const *addAuthData, 9723 NvU32 addAuthDataSize, 9724 NvU8 const *authTagBuffer) 9725 { 9726 if (ctx == NULL) 9727 { 9728 return NV_ERR_INVALID_ARGUMENT; 9729 } 9730 9731 return ccslDecrypt(ctx, bufferSize, inputBuffer, decryptIv, addAuthData, addAuthDataSize, 9732 outputBuffer, authTagBuffer); 9733 } 9734 9735 NV_STATUS nvGpuOpsCcslSign(struct ccslContext_t *ctx, 9736 NvU32 bufferSize, 9737 NvU8 const *inputBuffer, 9738 NvU8 *authTagBuffer) 9739 { 9740 if (ctx == NULL) 9741 { 9742 return NV_ERR_INVALID_ARGUMENT; 9743 } 9744 9745 return ccslSign(ctx, bufferSize, inputBuffer, authTagBuffer); 9746 } 9747 9748 NV_STATUS nvGpuOpsQueryMessagePool(struct ccslContext_t *ctx, 9749 NvU8 direction, 9750 NvU64 *messageNum) 9751 { 9752 if (ctx == NULL) 9753 { 9754 return NV_ERR_INVALID_ARGUMENT; 9755 } 9756 9757 switch (direction) 9758 { 9759 case UVM_CSL_OPERATION_ENCRYPT: 9760 return ccslQueryMessagePool(ctx, CCSL_DIR_HOST_TO_DEVICE, messageNum); 9761 case UVM_CSL_OPERATION_DECRYPT: 9762 return ccslQueryMessagePool(ctx, CCSL_DIR_DEVICE_TO_HOST, messageNum); 9763 default: 9764 return NV_ERR_INVALID_ARGUMENT; 9765 } 9766 } 9767 9768 NV_STATUS nvGpuOpsIncrementIv(struct ccslContext_t *ctx, 9769 NvU8 direction, 9770 NvU64 increment, 9771 NvU8 *iv) 9772 { 9773 if (ctx == NULL) 9774 { 9775 return NV_ERR_INVALID_ARGUMENT; 9776 } 9777 9778 switch (direction) 9779 { 9780 case UVM_CSL_OPERATION_ENCRYPT: 9781 return ccslIncrementIv(ctx, CCSL_DIR_HOST_TO_DEVICE, increment, iv); 9782 case UVM_CSL_OPERATION_DECRYPT: 9783 return ccslIncrementIv(ctx, CCSL_DIR_DEVICE_TO_HOST, increment, iv); 9784 default: 9785 return NV_ERR_INVALID_ARGUMENT; 9786 } 9787 } 9788