1 /* 2 * SPDX-FileCopyrightText: Copyright (c) 2013-2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved. 3 * SPDX-License-Identifier: MIT 4 * 5 * Permission is hereby granted, free of charge, to any person obtaining a 6 * copy of this software and associated documentation files (the "Software"), 7 * to deal in the Software without restriction, including without limitation 8 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 9 * and/or sell copies of the Software, and to permit persons to whom the 10 * Software is furnished to do so, subject to the following conditions: 11 * 12 * The above copyright notice and this permission notice shall be included in 13 * all copies or substantial portions of the Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 21 * DEALINGS IN THE SOFTWARE. 22 */ 23 24 #include "core/prelude.h" 25 26 27 #include <class/cl0002.h> 28 #include <class/cl0005.h> 29 #include <class/cl003e.h> // NV01_MEMORY_SYSTEM 30 #include <class/cl0040.h> // NV01_MEMORY_LOCAL_USER 31 #include <class/cl0080.h> 32 #include <class/cl503b.h> 33 #include <class/cl50a0.h> // NV50_MEMORY_VIRTUAL 34 #include <class/cl90e6.h> 35 #include <class/cl90f1.h> 36 #include <class/cla06f.h> 37 #include <class/clb069.h> 38 #include <class/clb069sw.h> 39 #include <class/clb06f.h> 40 #include <class/clb0b5.h> 41 #include <class/clb0b5sw.h> 42 #include <class/clb0c0.h> 43 #include <class/clb1c0.h> 44 #include <class/clc06f.h> 45 #include <class/clc076.h> 46 #include <class/clc0b5.h> 47 #include <class/clc0c0.h> 48 #include <class/clc1b5.h> 49 #include <class/clc1c0.h> 50 #include <class/clc361.h> 51 #include <class/clc365.h> 52 #include <class/clc369.h> 53 #include <class/clc36f.h> 54 #include <class/clc3b5.h> 55 #include <class/clc3c0.h> 56 #include <class/clc46f.h> 57 #include <class/clc4c0.h> 58 #include <class/clc56f.h> 59 #include <class/clc572.h> // PHYSICAL_CHANNEL_GPFIFO 60 #include <class/clc574.h> // UVM_CHANNEL_RETAINER 61 #include <class/clc5b5.h> 62 #include <class/clc5c0.h> 63 #include <class/clc637.h> 64 #include <class/clc6b5.h> 65 #include <class/clc6c0.h> 66 #include <class/clc7b5.h> 67 #include <class/clc7c0.h> 68 #include <class/clc661.h> // HOPPER_USERMODE_A 69 #include <class/clc8b5.h> // HOPPER_DMA_COPY_A 70 #include <class/clcbc0.h> // HOPPER_COMPUTE_A 71 72 #include <ctrl/ctrl0000/ctrl0000gpu.h> 73 #include <ctrl/ctrl0000/ctrl0000system.h> 74 #include <ctrl/ctrl0080/ctrl0080fifo.h> 75 #include <ctrl/ctrl0080/ctrl0080gpu.h> 76 #include <ctrl/ctrl2080/ctrl2080fb.h> 77 #include <ctrl/ctrl2080/ctrl2080fifo.h> 78 #include <ctrl/ctrl2080/ctrl2080gpu.h> 79 #include <ctrl/ctrl2080/ctrl2080gr.h> 80 #include <ctrl/ctrl90e6.h> 81 #include <ctrl/ctrl90f1.h> 82 #include <ctrl/ctrla06f.h> 83 #include <ctrl/ctrlb069.h> 84 #include <ctrl/ctrlc365.h> 85 #include <ctrl/ctrlc369.h> 86 #include <ctrl/ctrlc36f.h> 87 88 #include <ampere/ga100/dev_runlist.h> 89 #include <containers/queue.h> 90 #include <core/locks.h> 91 #include <gpu/bus/kern_bus.h> 92 #include <gpu/device/device.h> 93 #include <gpu/gpu.h> 94 #include <gpu/mem_mgr/heap.h> 95 #include <gpu/mem_mgr/mem_mgr.h> 96 #include <gpu/mem_mgr/virt_mem_allocator.h> 97 #include <gpu/mem_sys/kern_mem_sys.h> 98 #include <gpu/mmu/kern_gmmu.h> 99 #include <gpu/subdevice/subdevice.h> 100 #include <gpu_mgr/gpu_mgr.h> 101 #include <kepler/gk104/dev_timer.h> 102 #include <kernel/gpu/fifo/kernel_channel.h> 103 #include <kernel/gpu/fifo/kernel_channel_group.h> 104 #include <kernel/gpu/fifo/kernel_channel_group_api.h> 105 #include <kernel/gpu/fifo/kernel_ctxshare.h> 106 #include <kernel/gpu/gr/kernel_graphics.h> 107 #include <kernel/gpu/mig_mgr/gpu_instance_subscription.h> 108 #include <kernel/gpu/mig_mgr/kernel_mig_manager.h> 109 #include <kernel/gpu/nvlink/kernel_nvlink.h> 110 #include <mem_mgr/fabric_vaspace.h> 111 #include <mem_mgr/fla_mem.h> 112 #include <mem_mgr/gpu_vaspace.h> 113 #include <mem_mgr/vaspace.h> 114 #include <mmu/gmmu_fmt.h> 115 #include <nv_uvm_types.h> 116 #include <objrpc.h> 117 #include <os/os.h> 118 #include <resserv/rs_client.h> 119 #include <rmapi/client.h> 120 #include <rmapi/nv_gpu_ops.h> 121 #include <rmapi/rs_utils.h> 122 #include <turing/tu102/dev_vm.h> 123 #include <gpu/mem_mgr/vaspace_api.h> 124 #include <vgpu/rpc.h> 125 126 #include <pascal/gp100/dev_mmu.h> 127 128 #define NV_GPU_OPS_NUM_GPFIFO_ENTRIES_DEFAULT 1024 129 #define NV_GPU_SMALL_PAGESIZE (4 * 1024) 130 131 #define PAGE_SIZE_DEFAULT UVM_PAGE_SIZE_DEFAULT 132 133 typedef struct 134 { 135 NODE btreeNode; 136 NvU64 address; 137 NvHandle handle; 138 NvU64 size; 139 // childHandle tightly couples a physical allocation with a VA memdesc. 140 // A VA memsdesc is considered as a parent memdesc i.e. childHandle will be non-zero (valid). 141 // - If childHandle is non-zero,there is a corresponding PA allocation present. 142 // - If childHandle is zero, this is an invalid state for a VA memdesc. 143 NvHandle childHandle; 144 } gpuMemDesc; 145 146 typedef struct 147 { 148 NvU64 pageSize; // default is 4k or 64k else use pagesize = 2M. 149 NvU64 alignment; 150 } gpuVaAllocInfo; 151 152 typedef struct 153 { 154 NODE btreeNode; 155 NvU64 cpuPointer; 156 NvHandle handle; 157 } cpuMappingDesc; 158 159 typedef struct 160 { 161 NODE btreeNode; 162 PORT_RWLOCK *btreeLock; 163 NvHandle deviceHandle; 164 PNODE subDevices; 165 NvU32 subDeviceCount; 166 NvU32 arch; 167 NvU32 implementation; 168 } deviceDesc; 169 170 typedef struct 171 { 172 NODE btreeNode; 173 NvHandle subDeviceHandle; 174 NvU64 refCount; 175 struct 176 { 177 NvHandle handle; 178 179 // Pointer to the SMC partition information. It is used as a flag to 180 // indicate that the SMC information has been initialized. 181 KERNEL_MIG_GPU_INSTANCE *info; 182 } smcPartition; 183 NvU32 eccOffset; 184 NvU32 eccMask; 185 void *eccReadLocation; 186 NvHandle eccMasterHandle; 187 NvHandle eccCallbackHandle; 188 NvBool bEccInitialized; 189 NvBool bEccEnabled; 190 NvBool eccErrorNotifier; 191 NVOS10_EVENT_KERNEL_CALLBACK_EX eccDbeCallback; 192 193 // The below is used for controlling channel(s) in the GPU. 194 // Example: Volta maps the doorbell work submission register in this 195 // region. 196 NvHandle clientRegionHandle; 197 volatile void *clientRegionMapping; 198 } subDeviceDesc; 199 200 struct gpuSession 201 { 202 NvHandle handle; 203 PNODE devices; 204 PORT_RWLOCK *btreeLock; 205 }; 206 207 208 MAKE_MAP(MemdescMap, PMEMORY_DESCRIPTOR); 209 210 struct gpuDevice 211 { 212 deviceDesc *rmDevice; 213 subDeviceDesc *rmSubDevice; 214 215 // same as rmDevice->deviceHandle 216 NvHandle handle; 217 218 // same as rmSubDevice->subDeviceHandle 219 NvHandle subhandle; 220 221 NvU32 deviceInstance; 222 NvU32 subdeviceInstance; 223 NvU32 gpuId; 224 NvU32 hostClass; 225 NvU32 ceClass; 226 NvU32 sec2Class; 227 NvU32 computeClass; 228 NvU32 faultBufferClass; 229 NvU32 accessCounterBufferClass; 230 NvBool isTccMode; 231 NvBool isWddmMode; 232 struct gpuSession *session; 233 NvU8 gpuUUID[NV_GPU_UUID_LEN]; 234 gpuFbInfo fbInfo; 235 UVM_LINK_TYPE sysmemLink; 236 NvU32 sysmemLinkRateMBps; 237 NvBool connectedToSwitch; 238 239 MemdescMap kern2PhysDescrMap; 240 241 PORT_MUTEX *pPagingChannelRpcMutex; 242 }; 243 244 struct gpuAddressSpace 245 { 246 NvHandle handle; 247 struct gpuDevice *device; 248 PNODE allocations; 249 PORT_RWLOCK *allocationsLock; 250 PNODE cpuMappings; 251 PORT_RWLOCK *cpuMappingsLock; 252 PNODE physAllocations; 253 PORT_RWLOCK *physAllocationsLock; 254 NvU64 vaBase; 255 NvU64 vaSize; 256 // Dummy BAR1 allocation required on PCIe systems when GPPut resides in 257 // sysmem. 258 struct 259 { 260 NvU64 refCount; 261 NvU64 gpuAddr; 262 volatile void *cpuAddr; 263 } dummyGpuAlloc; 264 }; 265 266 struct gpuChannel 267 { 268 NvHandle channelHandle; 269 NvU32 hwRunlistId; 270 NvU32 hwChannelId; 271 UVM_GPU_CHANNEL_ENGINE_TYPE engineType; 272 273 // If engineType is CE, engineIndex is a zero-based offset from 274 // RM_ENGINE_TYPE_COPY0. If engineType is GR, engineIndex is a 275 // zero-based offset from NV2080_ENGINE_TYPE_GR0. 276 NvU32 engineIndex; 277 struct gpuAddressSpace *vaSpace; 278 NvU64 gpFifo; 279 NvNotification *errorNotifier; 280 NvU64 errorNotifierOffset; 281 NvU64 *gpFifoEntries; 282 NvU32 fifoEntries; 283 KeplerAControlGPFifo *controlPage; 284 struct gpuObject *nextAttachedEngine; 285 NvHandle hFaultCancelSwMethodClass; 286 volatile unsigned *workSubmissionOffset; 287 NvU32 workSubmissionToken; 288 volatile NvU32 *pWorkSubmissionToken; 289 NvHandle hUserdPhysHandle; 290 NvU64 userdGpuAddr; 291 UVM_BUFFER_LOCATION gpFifoLoc; 292 UVM_BUFFER_LOCATION gpPutLoc; 293 NvBool retainedDummyAlloc; 294 }; 295 296 // Add 3 to include local ctx buffer, patch context buffer and PM ctxsw buffer 297 ct_assert(UVM_GPU_CHANNEL_MAX_RESOURCES >= (GR_GLOBALCTX_BUFFER_COUNT + 3)); 298 299 // A retained channel is a user client's channel which has been registered with 300 // the UVM driver. 301 struct gpuRetainedChannel_struct 302 { 303 struct gpuDevice *device; 304 deviceDesc *rmDevice; 305 subDeviceDesc *rmSubDevice; 306 struct gpuSession *session; 307 OBJGPU *pGpu; 308 MEMORY_DESCRIPTOR *instanceMemDesc; 309 MEMORY_DESCRIPTOR *resourceMemDesc[UVM_GPU_CHANNEL_MAX_RESOURCES]; 310 UVM_GPU_CHANNEL_ENGINE_TYPE channelEngineType; 311 NvU32 resourceCount; 312 NvU32 chId; 313 NvU32 runlistId; 314 NvU32 grIdx; 315 316 // Dup of user's TSG (if one exists) under our RM client 317 NvHandle hDupTsg; 318 319 // Dup to context share object 320 NvHandle hDupKernelCtxShare; 321 322 // Handle for object that retains chId and instance mem 323 NvHandle hChannelRetainer; 324 }; 325 326 struct gpuObject 327 { 328 NvHandle handle; 329 NvU32 type; 330 struct gpuObject *next; 331 }; 332 333 struct allocFlags 334 { 335 NvBool bGetKernelVA; 336 NvBool bfixedAddressAllocate; 337 }; 338 339 struct ChannelAllocInfo 340 { 341 NV_CHANNEL_ALLOC_PARAMS gpFifoAllocParams; 342 gpuAllocInfo gpuAllocInfo; 343 }; 344 345 struct systemP2PCaps 346 { 347 // peerId[i] contains gpu[i]'s peer id of gpu[1 - i] 348 NvU32 peerIds[2]; 349 350 // true if the two GPUs are direct NvLink or PCIe peers 351 NvU32 accessSupported : 1; 352 353 // true if the two GPUs are indirect (NvLink) peers 354 NvU32 indirectAccessSupported : 1; 355 356 // true if the two GPUs are direct NvLink peers 357 NvU32 nvlinkSupported : 1; 358 359 NvU32 atomicSupported : 1; 360 361 // optimalNvlinkWriteCEs[i] contains the index of the optimal CE to use when 362 // writing from gpu[i] to gpu[1 - i] 363 NvU32 optimalNvlinkWriteCEs[2]; 364 }; 365 366 static NV_STATUS findUvmAddressSpace(NvHandle hClient, NvU32 gpuInstance, NvHandle *pHandle, OBJVASPACE **ppVaspace); 367 static NV_STATUS nvGpuOpsGpuMalloc(struct gpuAddressSpace *vaSpace, 368 NvBool isSystemMemory, 369 NvLength length, 370 NvU64 *gpuOffset, 371 struct allocFlags flags, 372 gpuAllocInfo *allocInfo); 373 static NV_STATUS trackDescriptor(PNODE *pRoot, NvU64 key, void *desc); 374 static NV_STATUS findDescriptor(PNODE pRoot, NvU64 key, void **desc); 375 static NV_STATUS deleteDescriptor(PNODE *pRoot, NvU64 key, void **desc); 376 static NV_STATUS destroyAllGpuMemDescriptors(NvHandle hClient, PNODE pNode); 377 static NV_STATUS getHandleForVirtualAddr(struct gpuAddressSpace *vaSpace, 378 NvU64 allocationVa, 379 NvBool bPhysical, 380 NvHandle *pHandle); 381 static NV_STATUS findDeviceClasses(NvHandle hRoot, 382 NvHandle hDevice, 383 NvHandle hSubdevice, 384 NvU32 *hostClass, 385 NvU32 *ceClass, 386 NvU32 *computeClass, 387 NvU32 *faultBufferClass, 388 NvU32 *accessCounterBufferClass, 389 NvU32 *sec2Class); 390 static NV_STATUS queryCopyEngines(struct gpuDevice *gpu, gpuCesCaps *cesCaps); 391 static void nvGpuOpsFreeVirtual(struct gpuAddressSpace *vaSpace, 392 NvU64 vaOffset); 393 static NvBool isDeviceVoltaPlus(const struct gpuDevice *device); 394 static NvBool isDeviceTuringPlus(const struct gpuDevice *device); 395 static NV_STATUS gpuDeviceMapUsermodeRegion(struct gpuDevice *device); 396 static void gpuDeviceDestroyUsermodeRegion(struct gpuDevice *device); 397 static void gpuDeviceUnmapCpuFreeHandle(struct gpuDevice *device, 398 NvHandle handle, 399 void *ptr, 400 NvU32 flags); 401 static NV_STATUS allocNvlinkStatusForSubdevice(struct gpuDevice *device, 402 NV2080_CTRL_CMD_NVLINK_GET_NVLINK_STATUS_PARAMS **nvlinkStatusOut); 403 static NvU32 getNvlinkConnectionToNpu(const NV2080_CTRL_CMD_NVLINK_GET_NVLINK_STATUS_PARAMS *nvlinkStatus, 404 NvBool *atomicSupported, 405 NvU32 *linkBandwidthMBps); 406 static NvU32 getNvlinkConnectionToSwitch(const NV2080_CTRL_CMD_NVLINK_GET_NVLINK_STATUS_PARAMS *nvlinkStatus, 407 NvU32 *linkBandwidthMBps); 408 static NV_STATUS nvGpuOpsGetMemoryByHandle(NvHandle hClient, NvHandle hMemory, Memory **ppMemory); 409 static void _nvGpuOpsReleaseChannel(gpuRetainedChannel *retainedChannel); 410 static NV_STATUS _nvGpuOpsRetainChannelResources(struct gpuDevice *device, 411 NvHandle hClient, 412 NvHandle hKernelChannel, 413 gpuRetainedChannel *retainedChannel, 414 gpuChannelInstanceInfo *channelInstanceInfo); 415 static void _nvGpuOpsReleaseChannelResources(gpuRetainedChannel *retainedChannel); 416 417 /* 418 * This function will lock the RM API lock according to rmApiLockFlags, and then 419 * examine numLocksNeeded. If this is 0, no GPU locks will be acquired. If it 420 * is 1, the GPU lock for deviceInstance1 will be locked. If it is 2, both GPU 421 * locks for deviceInstance1 and deviceInstance2 will be locked. If it is any 422 * other number, all the GPU locks will be acquired. 423 * 424 * This function will attempt to grab the needed GPU locks, and will write the 425 * resulting mask into *lockedGpusMask. In the event of a failure to acquire any 426 * needed GPU locks, the written mask is 0 and the function returns 427 * NV_ERR_INVALID_LOCK_STATE. In this case, all locks held are released and the 428 * caller does not need to release any locks. 429 */ 430 431 typedef struct nvGpuOpsLockSet 432 { 433 NvBool isRmLockAcquired; 434 NvBool isRmSemaAcquired; 435 GPU_MASK gpuMask; 436 RsClient *pClientLocked; 437 } nvGpuOpsLockSet; 438 439 static void _nvGpuOpsLocksRelease(nvGpuOpsLockSet *acquiredLocks) 440 { 441 OBJSYS *pSys; 442 pSys = SYS_GET_INSTANCE(); 443 444 if (acquiredLocks->gpuMask != 0) 445 { 446 rmGpuGroupLockRelease(acquiredLocks->gpuMask, GPUS_LOCK_FLAGS_NONE); 447 acquiredLocks->gpuMask = 0; 448 } 449 450 if (acquiredLocks->pClientLocked != NULL) 451 { 452 serverReleaseClient(&g_resServ, LOCK_ACCESS_WRITE, acquiredLocks->pClientLocked); 453 acquiredLocks->pClientLocked = NULL; 454 } 455 456 if (acquiredLocks->isRmLockAcquired == NV_TRUE) 457 { 458 rmapiLockRelease(); 459 acquiredLocks->isRmLockAcquired = NV_FALSE; 460 } 461 462 if (acquiredLocks->isRmSemaAcquired == NV_TRUE) 463 { 464 osReleaseRmSema(pSys->pSema, NULL); 465 acquiredLocks->isRmSemaAcquired = NV_FALSE; 466 } 467 } 468 469 static NV_STATUS _nvGpuOpsLocksAcquire(NvU32 rmApiLockFlags, 470 NvHandle hClient, 471 RsClient **ppClient, 472 NvU32 numLocksNeeded, 473 NvU32 deviceInstance1, 474 NvU32 deviceInstance2, 475 nvGpuOpsLockSet *acquiredLocks) 476 { 477 NV_STATUS status; 478 OBJSYS *pSys; 479 GPU_MASK gpuMaskRequested; 480 GPU_MASK gpuMaskAcquired; 481 482 acquiredLocks->isRmSemaAcquired = NV_FALSE; 483 acquiredLocks->isRmLockAcquired = NV_FALSE; 484 acquiredLocks->gpuMask = 0; 485 acquiredLocks->pClientLocked = NULL; 486 487 pSys = SYS_GET_INSTANCE(); 488 if (pSys == NULL) 489 { 490 return NV_ERR_GENERIC; 491 } 492 493 status = osAcquireRmSema(pSys->pSema); 494 if (status != NV_OK) 495 { 496 return status; 497 } 498 acquiredLocks->isRmSemaAcquired = NV_TRUE; 499 500 status = rmapiLockAcquire(rmApiLockFlags, RM_LOCK_MODULES_GPU_OPS); 501 if (status != NV_OK) 502 { 503 _nvGpuOpsLocksRelease(acquiredLocks); 504 return status; 505 } 506 acquiredLocks->isRmLockAcquired = NV_TRUE; 507 508 if (hClient != NV01_NULL_OBJECT) 509 { 510 status = serverAcquireClient(&g_resServ, hClient, LOCK_ACCESS_WRITE, &acquiredLocks->pClientLocked); 511 512 if (status != NV_OK) 513 { 514 _nvGpuOpsLocksRelease(acquiredLocks); 515 return status; 516 } 517 518 if (ppClient != NULL) 519 *ppClient = acquiredLocks->pClientLocked; 520 } 521 522 // 523 // Determine the GPU lock mask we need. If we are asked for 0, 1, or 2 locks 524 // then we should use neither, just the first, or both deviceInstance 525 // parameters, respectively. If any other number of locks is requested, we 526 // acquire all of the lockable GPUS. 527 // 528 // We cannot simply determine the mask outside of this function and pass in 529 // the mask, because gpumgrGetDeviceGpuMask requires that we hold the RM API 530 // lock. Otherwise, SLI rewiring could preempt lock acquisition and render 531 // the mask invalid. 532 // 533 gpuMaskRequested = 0; 534 535 if (numLocksNeeded > 2) 536 { 537 gpuMaskRequested = GPUS_LOCK_ALL; 538 } 539 else 540 { 541 if (numLocksNeeded > 0) 542 { 543 gpuMaskRequested |= gpumgrGetDeviceGpuMask(deviceInstance1); 544 } 545 546 if (numLocksNeeded > 1) 547 { 548 gpuMaskRequested |= gpumgrGetDeviceGpuMask(deviceInstance2); 549 } 550 } 551 552 // 553 // The gpuMask parameter to rmGpuGroupLockAcquire is both input and output, 554 // so we have to copy in what we want here to make comparisons later. 555 // 556 gpuMaskAcquired = gpuMaskRequested; 557 if (gpuMaskRequested != 0) 558 { 559 status = rmGpuGroupLockAcquire(0, GPU_LOCK_GRP_MASK, 560 GPUS_LOCK_FLAGS_NONE, 561 RM_LOCK_MODULES_GPU_OPS, &gpuMaskAcquired); 562 } 563 acquiredLocks->gpuMask = gpuMaskAcquired; 564 565 // 566 // If we cannot acquire all the locks requested, we release all the locks 567 // we *were* able to get and bail out here. There is never a safe way to 568 // proceed with a GPU ops function with fewer locks than requested. If there 569 // was a safe way to proceed, the client should have asked for fewer locks 570 // in the first place. 571 // 572 // That said, callers sometimes want "all available GPUs", and then the call 573 // to rmGpuGroupLockAcquire will mask off invalid GPUs for us. Hence the 574 // exception for GPUS_LOCK_ALL. 575 // 576 if (gpuMaskAcquired != gpuMaskRequested && gpuMaskRequested != GPUS_LOCK_ALL) 577 { 578 status = NV_ERR_INVALID_LOCK_STATE; 579 } 580 581 if (status != NV_OK) 582 { 583 _nvGpuOpsLocksRelease(acquiredLocks); 584 } 585 586 return status; 587 } 588 589 static NV_STATUS _nvGpuOpsLocksAcquireAll(NvU32 rmApiLockFlags, 590 NvHandle hClient, RsClient **ppClient, 591 nvGpuOpsLockSet *acquiredLocks) 592 { 593 return _nvGpuOpsLocksAcquire(rmApiLockFlags, hClient, ppClient, 3, 0, 0, acquiredLocks); 594 } 595 596 static NV_STATUS nvGpuOpsCreateClient(RM_API *pRmApi, NvHandle *hClient) 597 { 598 NV_STATUS status; 599 RS_SHARE_POLICY sharePolicy; 600 601 *hClient = NV01_NULL_OBJECT; 602 status = pRmApi->Alloc(pRmApi, NV01_NULL_OBJECT, NV01_NULL_OBJECT, 603 hClient, NV01_ROOT, hClient); 604 if (status != NV_OK) 605 { 606 return status; 607 } 608 609 // Override default system share policy. Prohibit sharing of any and all 610 // objects owned by this client. 611 portMemSet(&sharePolicy, 0, sizeof(sharePolicy)); 612 sharePolicy.type = RS_SHARE_TYPE_ALL; 613 sharePolicy.action = RS_SHARE_ACTION_FLAG_REVOKE; 614 RS_ACCESS_MASK_ADD(&sharePolicy.accessMask, RS_ACCESS_DUP_OBJECT); 615 616 status = pRmApi->Share(pRmApi, *hClient, *hClient, &sharePolicy); 617 if (status != NV_OK) 618 { 619 pRmApi->Free(pRmApi, *hClient, *hClient); 620 } 621 622 return status; 623 } 624 625 NV_STATUS nvGpuOpsCreateSession(struct gpuSession **session) 626 { 627 struct gpuSession *gpuSession = NULL; 628 NV_STATUS status; 629 RM_API *pRmApi = rmapiGetInterface(RMAPI_EXTERNAL_KERNEL); 630 PORT_MEM_ALLOCATOR *pAlloc = portMemAllocatorGetGlobalNonPaged(); 631 632 gpuSession = portMemAllocNonPaged(sizeof(*gpuSession)); 633 if (gpuSession == NULL) 634 return NV_ERR_NO_MEMORY; 635 636 portMemSet(gpuSession, 0, sizeof(*gpuSession)); 637 638 status = nvGpuOpsCreateClient(pRmApi, &gpuSession->handle); 639 if (status != NV_OK) 640 { 641 portMemFree(gpuSession); 642 return status; 643 } 644 645 gpuSession->devices = NULL; 646 gpuSession->btreeLock = portSyncRwLockCreate(pAlloc); 647 *session = (gpuSession); 648 return status; 649 } 650 651 NV_STATUS nvGpuOpsDestroySession(struct gpuSession *session) 652 { 653 RM_API *pRmApi = rmapiGetInterface(RMAPI_EXTERNAL_KERNEL); 654 655 if (!session) 656 return NV_OK; 657 658 // Sanity Check: There should not be any attached devices with the session! 659 NV_ASSERT(!session->devices); 660 661 // freeing session will free everything under it 662 pRmApi->Free(pRmApi, session->handle, session->handle); 663 portSyncRwLockDestroy(session->btreeLock); 664 portMemFree(session); 665 return NV_OK; 666 } 667 668 static void *gpuBar0BaseAddress(OBJGPU *pGpu) 669 { 670 DEVICE_MAPPING *pMapping = gpuGetDeviceMapping(pGpu, DEVICE_INDEX_GPU, 0); 671 672 NV_ASSERT(pMapping); 673 674 return pMapping->gpuNvAddr; 675 } 676 677 static void eccErrorCallback(void *pArg, void *pData, NvHandle hEvent, 678 NvU32 data, NvU32 status) 679 { 680 subDeviceDesc *rmSubDevice = (subDeviceDesc *)pArg; 681 682 NV_ASSERT(rmSubDevice); 683 684 rmSubDevice->eccErrorNotifier = NV_TRUE; 685 } 686 687 static NvBool deviceNeedsDummyAlloc(struct gpuDevice *device) 688 { 689 // The dummy mapping is needed so the client can issue a read to flush out 690 // any CPU BAR1 PCIE writes prior to updating GPPUT. This is only needed 691 // when the bus is non-coherent and when not in ZeroFB (where there can't be 692 // any BAR1 mappings). 693 return device->sysmemLink < UVM_LINK_TYPE_NVLINK_2 && !device->fbInfo.bZeroFb; 694 } 695 696 static NV_STATUS nvGpuOpsVaSpaceRetainDummyAlloc(struct gpuAddressSpace *vaSpace) 697 { 698 struct gpuDevice *device; 699 NV_STATUS status = NV_OK; 700 gpuAllocInfo allocInfo = {0}; 701 struct allocFlags flags = {0}; 702 703 device = vaSpace->device; 704 NV_ASSERT(device); 705 NV_ASSERT(deviceNeedsDummyAlloc(device)); 706 707 if (vaSpace->dummyGpuAlloc.refCount > 0) 708 goto done; 709 710 flags.bGetKernelVA = NV_FALSE; 711 status = nvGpuOpsGpuMalloc(vaSpace, 712 NV_FALSE, 713 NV_GPU_SMALL_PAGESIZE, 714 &vaSpace->dummyGpuAlloc.gpuAddr, 715 flags, 716 &allocInfo); 717 if (status != NV_OK) 718 return status; 719 720 status = nvGpuOpsMemoryCpuMap(vaSpace, 721 vaSpace->dummyGpuAlloc.gpuAddr, 722 NV_GPU_SMALL_PAGESIZE, 723 (void **)&vaSpace->dummyGpuAlloc.cpuAddr, 724 PAGE_SIZE_DEFAULT); 725 if (status != NV_OK) 726 nvGpuOpsMemoryFree(vaSpace, vaSpace->dummyGpuAlloc.gpuAddr); 727 728 done: 729 if (status == NV_OK) 730 { 731 ++vaSpace->dummyGpuAlloc.refCount; 732 NV_ASSERT(vaSpace->dummyGpuAlloc.gpuAddr); 733 NV_ASSERT(vaSpace->dummyGpuAlloc.cpuAddr); 734 } 735 736 return status; 737 } 738 739 static void nvGpuOpsVaSpaceReleaseDummyAlloc(struct gpuAddressSpace *vaSpace) 740 { 741 NV_ASSERT(deviceNeedsDummyAlloc(vaSpace->device)); 742 NV_ASSERT(vaSpace->dummyGpuAlloc.refCount != 0); 743 744 if (--vaSpace->dummyGpuAlloc.refCount > 0) 745 return; 746 747 if (vaSpace->dummyGpuAlloc.cpuAddr) 748 nvGpuOpsMemoryCpuUnMap(vaSpace, (void *)vaSpace->dummyGpuAlloc.cpuAddr); 749 750 if (vaSpace->dummyGpuAlloc.gpuAddr) 751 nvGpuOpsMemoryFree(vaSpace, vaSpace->dummyGpuAlloc.gpuAddr); 752 753 vaSpace->dummyGpuAlloc.cpuAddr = NULL; 754 vaSpace->dummyGpuAlloc.gpuAddr = 0; 755 } 756 757 static NV_STATUS nvGpuOpsDisableVaSpaceChannels(struct gpuAddressSpace *vaSpace) 758 { 759 NV_STATUS status = NV_OK; 760 OBJVASPACE *pVAS = NULL; 761 Device *pDevice; 762 RsClient *pClient; 763 RS_ORDERED_ITERATOR it; 764 RM_API *pRmApi = rmapiGetInterface(RMAPI_GPU_LOCK_INTERNAL); 765 NV2080_CTRL_FIFO_DISABLE_CHANNELS_PARAMS disableParams = {0}; 766 767 if (vaSpace == NULL) 768 return NV_ERR_INVALID_ARGUMENT; 769 770 status = serverGetClientUnderLock(&g_resServ, vaSpace->device->session->handle, &pClient); 771 if (status != NV_OK) 772 return status; 773 774 status = deviceGetByHandle(pClient, vaSpace->device->handle, &pDevice); 775 if (status != NV_OK) 776 return status; 777 778 GPU_RES_SET_THREAD_BC_STATE(pDevice); 779 780 status = vaspaceGetByHandleOrDeviceDefault(pClient, 781 vaSpace->device->handle, 782 vaSpace->handle, 783 &pVAS); 784 if ((status != NV_OK) || (pVAS == NULL)) 785 return NV_ERR_INVALID_ARGUMENT; 786 787 // Stop all channels under the VAS, but leave them bound. 788 it = kchannelGetIter(pClient, RES_GET_REF(pDevice)); 789 while (clientRefOrderedIterNext(pClient, &it)) 790 { 791 KernelChannel *pKernelChannel = dynamicCast(it.pResourceRef->pResource, KernelChannel); 792 793 NV_ASSERT_OR_ELSE(pKernelChannel != NULL, continue); 794 if (pKernelChannel->pVAS != pVAS) 795 continue; 796 797 NV_ASSERT_OR_RETURN(disableParams.numChannels < NV2080_CTRL_FIFO_DISABLE_CHANNELS_MAX_ENTRIES, NV_ERR_NOT_SUPPORTED); 798 disableParams.hClientList[disableParams.numChannels] = RES_GET_CLIENT_HANDLE(pKernelChannel); 799 disableParams.hChannelList[disableParams.numChannels] = RES_GET_HANDLE(pKernelChannel); 800 disableParams.numChannels++; 801 } 802 803 if (disableParams.numChannels == 0) 804 return status; 805 806 disableParams.bDisable = NV2080_CTRL_FIFO_DISABLE_CHANNEL_TRUE; 807 status = pRmApi->Control(pRmApi, 808 vaSpace->device->session->handle, 809 vaSpace->device->subhandle, 810 NV2080_CTRL_CMD_FIFO_DISABLE_CHANNELS, 811 &disableParams, 812 sizeof(disableParams)); 813 return status; 814 } 815 816 static NV_STATUS nvGpuOpsEnableVaSpaceChannels(struct gpuAddressSpace *vaSpace) 817 { 818 NV_STATUS status = NV_OK; 819 OBJVASPACE *pVAS = NULL; 820 Device *pDevice; 821 RsClient *pClient; 822 RS_ORDERED_ITERATOR it; 823 NV2080_CTRL_FIFO_DISABLE_CHANNELS_PARAMS disableParams = {0}; 824 RM_API *pRmApi = rmapiGetInterface(RMAPI_GPU_LOCK_INTERNAL); 825 826 if (vaSpace == NULL) 827 return NV_ERR_INVALID_ARGUMENT; 828 829 status = serverGetClientUnderLock(&g_resServ, vaSpace->device->session->handle, &pClient); 830 if (status != NV_OK) 831 return status; 832 833 status = deviceGetByHandle(pClient, vaSpace->device->handle, &pDevice); 834 if (status != NV_OK) 835 return status; 836 837 GPU_RES_SET_THREAD_BC_STATE(pDevice); 838 839 status = vaspaceGetByHandleOrDeviceDefault(pClient, 840 vaSpace->device->handle, 841 vaSpace->handle, 842 &pVAS); 843 if ((status != NV_OK) || (pVAS == NULL)) 844 return NV_ERR_INVALID_ARGUMENT; 845 846 it = kchannelGetIter(pClient, RES_GET_REF(pDevice)); 847 while (clientRefOrderedIterNext(pClient, &it)) 848 { 849 KernelChannel *pKernelChannel = dynamicCast(it.pResourceRef->pResource, KernelChannel); 850 851 NV_ASSERT_OR_ELSE(pKernelChannel != NULL, continue); 852 if (pKernelChannel->pVAS != pVAS) 853 continue; 854 855 NV_ASSERT_OR_RETURN(disableParams.numChannels < NV2080_CTRL_FIFO_DISABLE_CHANNELS_MAX_ENTRIES, NV_ERR_NOT_SUPPORTED); 856 disableParams.hClientList[disableParams.numChannels] = RES_GET_CLIENT_HANDLE(pKernelChannel); 857 disableParams.hChannelList[disableParams.numChannels] = RES_GET_HANDLE(pKernelChannel); 858 disableParams.numChannels++; 859 } 860 861 if (disableParams.numChannels == 0) 862 return status; 863 864 disableParams.bDisable = NV2080_CTRL_FIFO_DISABLE_CHANNEL_FALSE; 865 status = pRmApi->Control(pRmApi, 866 vaSpace->device->session->handle, 867 vaSpace->device->subhandle, 868 NV2080_CTRL_CMD_FIFO_DISABLE_CHANNELS, 869 &disableParams, 870 sizeof(disableParams)); 871 return status; 872 } 873 874 static NV_STATUS nvGpuOpsRmDeviceCreate(struct gpuDevice *device) 875 { 876 NV_STATUS status; 877 NV0080_ALLOC_PARAMETERS nv0080AllocParams = { 0 }; 878 deviceDesc *rmDevice = NULL; 879 struct gpuSession *session = device->session; 880 RM_API *pRmApi = rmapiGetInterface(RMAPI_EXTERNAL_KERNEL); 881 PORT_MEM_ALLOCATOR *pAlloc = portMemAllocatorGetGlobalNonPaged(); 882 OBJGPU *pGpu; 883 884 // Find the existing rmDevice. 885 // Otherwise, allocate an rmDevice. 886 portSyncRwLockAcquireRead(session->btreeLock); 887 status = findDescriptor(session->devices, device->deviceInstance, (void**)&rmDevice); 888 portSyncRwLockReleaseRead(session->btreeLock); 889 if (status == NV_OK) 890 { 891 NV_ASSERT(rmDevice); 892 device->rmDevice = rmDevice; 893 device->handle = rmDevice->deviceHandle; 894 return NV_OK; 895 } 896 897 rmDevice = portMemAllocNonPaged(sizeof(*rmDevice)); 898 if (rmDevice == NULL) 899 return NV_ERR_INSUFFICIENT_RESOURCES; 900 901 portMemSet(rmDevice, 0, sizeof(*rmDevice)); 902 903 nv0080AllocParams.deviceId = device->deviceInstance; 904 nv0080AllocParams.hClientShare = session->handle; 905 device->handle = NV01_NULL_OBJECT; 906 status = pRmApi->Alloc(pRmApi, 907 session->handle, 908 session->handle, 909 &device->handle, 910 NV01_DEVICE_0, 911 &nv0080AllocParams); 912 if (status != NV_OK) 913 goto cleanup_device_desc; 914 915 device->rmDevice = rmDevice; 916 rmDevice->deviceHandle = device->handle; 917 rmDevice->subDevices = NULL; 918 rmDevice->subDeviceCount = 0; 919 920 portSyncRwLockAcquireWrite(session->btreeLock); 921 status = trackDescriptor(&session->devices, device->deviceInstance, rmDevice); 922 portSyncRwLockReleaseWrite(session->btreeLock); 923 if (status != NV_OK) 924 goto cleanup_device; 925 926 // TODO: Acquired because CliGetGpuContext expects RMAPI lock. Necessary? 927 status = rmapiLockAcquire(RMAPI_LOCK_FLAGS_READ, RM_LOCK_MODULES_GPU_OPS); 928 if (status != NV_OK) 929 goto cleanup_device; 930 status = CliSetGpuContext(session->handle, device->handle, &pGpu, NULL); 931 rmapiLockRelease(); 932 if (status != NV_OK) 933 goto cleanup_device; 934 935 rmDevice->arch = gpuGetChipArch(pGpu); 936 rmDevice->implementation = gpuGetChipImpl(pGpu); 937 rmDevice->btreeLock = portSyncRwLockCreate(pAlloc); 938 939 return NV_OK; 940 941 cleanup_device: 942 pRmApi->Free(pRmApi, session->handle, device->handle); 943 cleanup_device_desc: 944 portMemFree(rmDevice); 945 return status; 946 } 947 948 static void nvGpuOpsRmDeviceDestroy(struct gpuDevice *device) 949 { 950 RM_API *pRmApi = rmapiGetInterface(RMAPI_EXTERNAL_KERNEL); 951 deviceDesc *rmDevice = device->rmDevice; 952 953 NV_ASSERT(rmDevice != NULL); 954 955 if (rmDevice->subDeviceCount == 0) 956 { 957 struct gpuSession *session = device->session; 958 portSyncRwLockAcquireWrite(session->btreeLock); 959 deleteDescriptor(&session->devices, device->deviceInstance, (void**)&rmDevice); 960 pRmApi->Free(pRmApi, session->handle, rmDevice->deviceHandle); 961 portSyncRwLockDestroy(rmDevice->btreeLock); 962 portMemFree(rmDevice); 963 portSyncRwLockReleaseWrite(session->btreeLock); 964 } 965 } 966 967 static void gpuDeviceRmSubDeviceDeinitEcc(struct gpuDevice *device) 968 { 969 NV2080_CTRL_EVENT_SET_NOTIFICATION_PARAMS eventDbeParams = {0}; 970 subDeviceDesc *rmSubDevice = device->rmSubDevice; 971 RM_API *pRmApi = rmapiGetInterface(RMAPI_EXTERNAL_KERNEL); 972 973 if (!rmSubDevice->bEccInitialized || !rmSubDevice->bEccEnabled) 974 return; 975 976 // Disable all notifications specific to ECC on this device 977 eventDbeParams.event = NV2080_NOTIFIERS_ECC_DBE; 978 eventDbeParams.action = NV2080_CTRL_EVENT_SET_NOTIFICATION_ACTION_DISABLE; 979 980 pRmApi->Control(pRmApi, 981 device->session->handle, 982 device->subhandle, 983 NV2080_CTRL_CMD_EVENT_SET_NOTIFICATION, 984 (void *)&eventDbeParams, 985 sizeof(eventDbeParams)); 986 987 if (!isDeviceTuringPlus(device)) 988 { 989 gpuDeviceUnmapCpuFreeHandle(device, 990 rmSubDevice->eccMasterHandle, 991 rmSubDevice->eccReadLocation, 992 DRF_DEF(OS33, _FLAGS, _ACCESS, _READ_ONLY)); 993 } 994 995 rmSubDevice->eccReadLocation = NULL; 996 997 if (rmSubDevice->eccCallbackHandle) 998 pRmApi->Free(pRmApi, device->session->handle, rmSubDevice->eccCallbackHandle); 999 1000 rmSubDevice->bEccEnabled = NV_FALSE; 1001 rmSubDevice->bEccInitialized = NV_FALSE; 1002 } 1003 1004 // 1005 // Initialize the ECC state for an RM subdevice 1006 // 1007 // This can only be done once per RM subdevice as GF100_SUBDEVICE_MASTER can 1008 // only be allocated once. 1009 // 1010 static NV_STATUS gpuDeviceRmSubDeviceInitEcc(struct gpuDevice *device) 1011 { 1012 NV_STATUS status = NV_OK; 1013 NvU32 i = 0; 1014 int tempPtr = 0; 1015 1016 struct 1017 { 1018 NV2080_CTRL_GPU_QUERY_ECC_STATUS_PARAMS eccStatus; 1019 NV90E6_CTRL_MASTER_GET_ECC_INTR_OFFSET_MASK_PARAMS eccMask; 1020 NV90E6_CTRL_MASTER_GET_VIRTUAL_FUNCTION_ERROR_CONT_INTR_MASK_PARAMS errContIntrMask; 1021 NV2080_CTRL_EVENT_SET_NOTIFICATION_PARAMS eventDbe; 1022 NV0005_ALLOC_PARAMETERS allocDbe; 1023 } *pParams = NULL; 1024 OBJGPU *pGpu = NULL; 1025 NvBool supportedOnAnyUnits = NV_FALSE; 1026 subDeviceDesc *rmSubDevice = device->rmSubDevice; 1027 RM_API *pRmApi = rmapiGetInterface(RMAPI_EXTERNAL_KERNEL); 1028 1029 NV_ASSERT(device); 1030 1031 // TODO: Acquired because CliGetGpuContext expects RMAPI lock. Necessary? 1032 status = rmapiLockAcquire(RMAPI_LOCK_FLAGS_READ, RM_LOCK_MODULES_GPU_OPS); 1033 if (status != NV_OK) 1034 return status; 1035 status = CliSetGpuContext(device->session->handle, device->handle, &pGpu, NULL); 1036 rmapiLockRelease(); 1037 if (status != NV_OK) 1038 return status; 1039 1040 rmSubDevice->eccOffset = 0; 1041 rmSubDevice->eccMask = 0; 1042 rmSubDevice->eccReadLocation = NULL; 1043 rmSubDevice->eccMasterHandle = 0; 1044 rmSubDevice->bEccInitialized = NV_FALSE; 1045 rmSubDevice->bEccEnabled = NV_FALSE; 1046 1047 // Do not initialize ECC for this device if SMC is enabled, but no partition 1048 // was subscribed to. This will be the case for select devices created 1049 // on behalf of the UVM driver. 1050 if (IS_MIG_IN_USE(pGpu) && rmSubDevice->smcPartition.info == NULL) 1051 return NV_OK; 1052 1053 pParams = portMemAllocNonPaged(sizeof(*pParams)); 1054 if (pParams == NULL) 1055 { 1056 return NV_ERR_NO_MEMORY; 1057 } 1058 1059 portMemSet(pParams, 0, sizeof(*pParams)); 1060 1061 // Check ECC before doing anything here 1062 status = pRmApi->Control(pRmApi, 1063 device->session->handle, 1064 device->subhandle, 1065 NV2080_CTRL_CMD_GPU_QUERY_ECC_STATUS, 1066 &pParams->eccStatus, 1067 sizeof(pParams->eccStatus)); 1068 1069 if (status == NV_ERR_NOT_SUPPORTED) 1070 { 1071 // Nothing to do if ECC not supported 1072 rmSubDevice->bEccEnabled = NV_FALSE; 1073 status = NV_OK; 1074 goto done; 1075 } 1076 else if (status != NV_OK) 1077 { 1078 goto done; 1079 } 1080 1081 // 1082 // ECC is considered as supported only if it's enabled for all supported units, 1083 // and there's at least 1 supported unit 1084 // 1085 rmSubDevice->bEccEnabled = NV_TRUE; 1086 1087 for (i = 0; i < NV2080_CTRL_GPU_ECC_UNIT_COUNT; i++) 1088 { 1089 // Check the ECC status only on the units supported by HW 1090 if (pParams->eccStatus.units[i].supported) 1091 { 1092 supportedOnAnyUnits = NV_TRUE; 1093 if (!pParams->eccStatus.units[i].enabled) 1094 rmSubDevice->bEccEnabled = NV_FALSE; 1095 } 1096 } 1097 1098 if (!supportedOnAnyUnits) 1099 rmSubDevice->bEccEnabled = NV_FALSE; 1100 1101 if (!rmSubDevice->bEccEnabled) 1102 { 1103 // ECC not enabled, early-out 1104 status = NV_OK; 1105 goto done; 1106 } 1107 1108 //Allocate memory for interrupt tree 1109 rmSubDevice->eccMasterHandle = NV01_NULL_OBJECT; 1110 status = pRmApi->Alloc(pRmApi, device->session->handle, 1111 device->subhandle, 1112 &rmSubDevice->eccMasterHandle, 1113 GF100_SUBDEVICE_MASTER, 1114 &tempPtr); 1115 if (status != NV_OK) 1116 goto done; 1117 1118 if (isDeviceTuringPlus(device)) 1119 { 1120 rmSubDevice->eccReadLocation = gpuBar0BaseAddress(pGpu); 1121 status = pRmApi->Control(pRmApi, 1122 device->session->handle, 1123 rmSubDevice->eccMasterHandle, 1124 NV90E6_CTRL_CMD_MASTER_GET_VIRTUAL_FUNCTION_ERROR_CONT_INTR_MASK, 1125 &pParams->errContIntrMask, 1126 sizeof(pParams->errContIntrMask)); 1127 if (status != NV_OK) 1128 goto done; 1129 1130 rmSubDevice->eccOffset = GPU_GET_VREG_OFFSET(pGpu, NV_VIRTUAL_FUNCTION_ERR_CONT); 1131 rmSubDevice->eccMask = pParams->errContIntrMask.eccMask; 1132 } 1133 else 1134 { 1135 // Map memory 1136 status = pRmApi->MapToCpu(pRmApi, 1137 device->session->handle, 1138 device->subhandle, 1139 rmSubDevice->eccMasterHandle, 0, 1140 sizeof(GF100MASTERMap), 1141 (void **)(&rmSubDevice->eccReadLocation), 1142 DRF_DEF(OS33, _FLAGS, _ACCESS, _READ_ONLY)); 1143 if (status != NV_OK) 1144 goto done; 1145 1146 NV_ASSERT(rmSubDevice->eccReadLocation); 1147 1148 status = pRmApi->Control(pRmApi, 1149 device->session->handle, 1150 rmSubDevice->eccMasterHandle, 1151 NV90E6_CTRL_CMD_MASTER_GET_ECC_INTR_OFFSET_MASK, 1152 &pParams->eccMask, 1153 sizeof(pParams->eccMask)); 1154 if (status != NV_OK) 1155 goto done; 1156 1157 // Fill the mask and offset which has been read from control call 1158 rmSubDevice->eccOffset = pParams->eccMask.offset; 1159 rmSubDevice->eccMask = pParams->eccMask.mask; 1160 } 1161 1162 // Setup callback for ECC DBE 1163 rmSubDevice->eccDbeCallback.func = eccErrorCallback; 1164 rmSubDevice->eccDbeCallback.arg = rmSubDevice; 1165 1166 pParams->allocDbe.hParentClient = device->session->handle; 1167 pParams->allocDbe.hClass = NV01_EVENT_KERNEL_CALLBACK_EX; 1168 pParams->allocDbe.notifyIndex = NV2080_NOTIFIERS_ECC_DBE; 1169 pParams->allocDbe.data = NV_PTR_TO_NvP64(&rmSubDevice->eccDbeCallback); 1170 1171 rmSubDevice->eccCallbackHandle = NV01_NULL_OBJECT; 1172 status = pRmApi->Alloc(pRmApi, device->session->handle, 1173 device->subhandle, 1174 &rmSubDevice->eccCallbackHandle, 1175 NV01_EVENT_KERNEL_CALLBACK_EX, 1176 &pParams->allocDbe); 1177 1178 if (status != NV_OK) 1179 goto done; 1180 1181 pParams->eventDbe.event = NV2080_NOTIFIERS_ECC_DBE; 1182 pParams->eventDbe.action = NV2080_CTRL_EVENT_SET_NOTIFICATION_ACTION_SINGLE; 1183 1184 status = pRmApi->Control(pRmApi, 1185 device->session->handle, 1186 device->subhandle, 1187 NV2080_CTRL_CMD_EVENT_SET_NOTIFICATION, 1188 &pParams->eventDbe, 1189 sizeof(pParams->eventDbe)); 1190 if (status != NV_OK) 1191 goto done; 1192 1193 done: 1194 portMemFree(pParams); 1195 1196 if (status == NV_OK) 1197 rmSubDevice->bEccInitialized = NV_TRUE; 1198 else 1199 gpuDeviceRmSubDeviceDeinitEcc(device); 1200 1201 return status; 1202 } 1203 1204 static NV_STATUS getSwizzIdFromSmcPartHandle(RM_API *pRmApi, 1205 NvHandle hClient, 1206 NvHandle hGPUInstanceSubscription, 1207 NvU32 *swizzId) 1208 { 1209 NV_STATUS status; 1210 RsResourceRef *pSmcResourceRef; 1211 GPUInstanceSubscription *pGPUInstanceSubscription; 1212 1213 // get GPUInstanceSubscription handle 1214 // TODO: Acquired because serverutilGetResourceRef expects RMAPI lock. Necessary? 1215 status = rmapiLockAcquire(RMAPI_LOCK_FLAGS_READ, RM_LOCK_MODULES_GPU_OPS); 1216 if (status != NV_OK) 1217 return status; 1218 status = serverutilGetResourceRef(hClient, hGPUInstanceSubscription, &pSmcResourceRef); 1219 rmapiLockRelease(); 1220 if (status != NV_OK) 1221 return status; 1222 1223 pGPUInstanceSubscription = dynamicCast(pSmcResourceRef->pResource, GPUInstanceSubscription); 1224 if (!pGPUInstanceSubscription) 1225 return NV_ERR_INVALID_OBJECT; 1226 1227 *swizzId = pGPUInstanceSubscription->pKernelMIGGpuInstance->swizzId; 1228 1229 return NV_OK; 1230 } 1231 1232 // 1233 // Determine an SMC partition's swizzId given a user subscription 1234 // 1235 // This requires temporarily duplicating the handle to validate it, as well 1236 // as to prevent removal of the partition for the duration of the look-up. 1237 // However, neither the partition, nor the swizzId uniquely identifying 1238 // it (within the scope of its parent GPU) are guaranteed to remain valid, and 1239 // callers of this function must be prepared for removal of the partition 1240 // between nvGpuOpsGetGpuInfo() and nvGpuOpsDeviceCreate(). 1241 // 1242 static NV_STATUS getSwizzIdFromUserSmcPartHandle(RM_API *pRmApi, 1243 NvHandle hClient, 1244 NvHandle hParent, 1245 NvHandle hUserClient, 1246 NvHandle hUserGPUInstanceSubscription, 1247 NvU32 *swizzId) 1248 { 1249 NV_STATUS status; 1250 NvHandle dupedGPUInstanceSubscription; 1251 1252 // TODO: Acquired because serverutilGenResourceHandle expects RMAPI lock. Necessary? 1253 status = rmapiLockAcquire(RMAPI_LOCK_FLAGS_READ, RM_LOCK_MODULES_GPU_OPS); 1254 if (status != NV_OK) 1255 return status; 1256 status = serverutilGenResourceHandle(hClient, &dupedGPUInstanceSubscription); 1257 rmapiLockRelease(); 1258 if (status != NV_OK) 1259 return status; 1260 1261 status = pRmApi->DupObject(pRmApi, 1262 hClient, 1263 hParent, 1264 &dupedGPUInstanceSubscription, 1265 hUserClient, 1266 hUserGPUInstanceSubscription, 1267 NV04_DUP_HANDLE_FLAGS_REJECT_KERNEL_DUP_PRIVILEGE); 1268 if (status != NV_OK) 1269 return status; 1270 1271 // get GPUInstanceSubscription handle 1272 status = getSwizzIdFromSmcPartHandle(pRmApi, hClient, dupedGPUInstanceSubscription, 1273 swizzId); 1274 1275 pRmApi->Free(pRmApi, hClient, dupedGPUInstanceSubscription); 1276 1277 return status; 1278 } 1279 1280 static void nvGpuOpsRmSmcPartitionDestroy(struct gpuDevice *device) 1281 { 1282 subDeviceDesc *rmSubDevice = device->rmSubDevice; 1283 1284 if (rmSubDevice->smcPartition.info != NULL) 1285 { 1286 RM_API *pRmApi = rmapiGetInterface(RMAPI_EXTERNAL_KERNEL); 1287 1288 pRmApi->Free(pRmApi, 1289 device->session->handle, 1290 rmSubDevice->smcPartition.handle); 1291 1292 rmSubDevice->smcPartition.info = NULL; 1293 } 1294 } 1295 1296 static NV_STATUS nvGpuOpsRmSmcPartitionCreate(struct gpuDevice *device, const gpuInfo *pGpuInfo) 1297 { 1298 NV_STATUS status; 1299 OBJGPU *pGpu = NULL; 1300 subDeviceDesc *rmSubDevice = device->rmSubDevice; 1301 NvHandle dupUserHandle; 1302 RM_API *pRmApi = rmapiGetInterface(RMAPI_EXTERNAL_KERNEL); 1303 struct gpuSession *session = device->session; 1304 RsResourceRef *pSmcResourceRef; 1305 GPUInstanceSubscription *pGPUInstanceSubscription; 1306 NvU32 swizzId; 1307 1308 NV_ASSERT(rmSubDevice->smcPartition.info == NULL); 1309 1310 if (!pGpuInfo->smcEnabled) 1311 return NV_ERR_INVALID_ARGUMENT; 1312 1313 // TODO: Acquired because CliSetGpuContext expects RMAPI lock. Necessary? 1314 status = rmapiLockAcquire(RMAPI_LOCK_FLAGS_READ, RM_LOCK_MODULES_GPU_OPS); 1315 if (status != NV_OK) 1316 return status; 1317 status = CliSetGpuContext(session->handle, device->handle, &pGpu, NULL); 1318 rmapiLockRelease(); 1319 if (status != NV_OK) 1320 return status; 1321 1322 // Allocate the SMC partition object 1323 1324 // SMC GPU partitioning was disabled since we detected the partition in 1325 // nvGpuOpsGetGpuInfo 1326 if (!IS_MIG_IN_USE(pGpu)) 1327 return NV_ERR_INVALID_STATE; 1328 1329 status = pRmApi->DupObject(pRmApi, 1330 session->handle, 1331 rmSubDevice->subDeviceHandle, 1332 &dupUserHandle, 1333 pGpuInfo->smcUserClientInfo.hClient, 1334 pGpuInfo->smcUserClientInfo.hSmcPartRef, 1335 NV04_DUP_HANDLE_FLAGS_REJECT_KERNEL_DUP_PRIVILEGE); 1336 if (status != NV_OK) 1337 return status; 1338 1339 status = getSwizzIdFromSmcPartHandle(pRmApi, 1340 session->handle, 1341 dupUserHandle, 1342 &swizzId); 1343 if (status != NV_OK) 1344 goto cleanup_dup_user_handle; 1345 1346 // The swizzId changed since the call to nvGpuOpsGetGpuInfo: either the 1347 // object identified by smcUser*Handle changed, or else its configuration 1348 // was altered. 1349 if (swizzId != pGpuInfo->smcSwizzId) 1350 { 1351 status = NV_ERR_INVALID_STATE; 1352 goto cleanup_dup_user_handle; 1353 } 1354 1355 rmSubDevice->smcPartition.handle = dupUserHandle; 1356 1357 // get GPUInstanceSubscription handle 1358 // TODO: Acquired because serverutilGetResourceRef expects RMAPI lock. Necessary? 1359 status = rmapiLockAcquire(RMAPI_LOCK_FLAGS_READ, RM_LOCK_MODULES_GPU_OPS); 1360 if (status != NV_OK) 1361 goto cleanup_dup_user_handle; 1362 status = serverutilGetResourceRef(session->handle, rmSubDevice->smcPartition.handle, &pSmcResourceRef); 1363 rmapiLockRelease(); 1364 if (status != NV_OK) 1365 goto cleanup_dup_user_handle; 1366 1367 pGPUInstanceSubscription = dynamicCast(pSmcResourceRef->pResource, GPUInstanceSubscription); 1368 NV_ASSERT(pGPUInstanceSubscription != NULL); 1369 1370 NV_ASSERT(pGPUInstanceSubscription->pKernelMIGGpuInstance->swizzId == pGpuInfo->smcSwizzId); 1371 1372 rmSubDevice->smcPartition.info = pGPUInstanceSubscription->pKernelMIGGpuInstance; 1373 1374 return NV_OK; 1375 1376 cleanup_dup_user_handle: 1377 pRmApi->Free(pRmApi, session->handle, dupUserHandle); 1378 1379 return status; 1380 } 1381 1382 static NV_STATUS nvGpuOpsRmSubDeviceCreate(struct gpuDevice *device) 1383 { 1384 NV_STATUS status; 1385 NV2080_ALLOC_PARAMETERS nv2080AllocParams = { 0 }; 1386 deviceDesc *rmDevice = NULL; 1387 subDeviceDesc *rmSubDevice = NULL; 1388 struct gpuSession *session = device->session; 1389 RM_API *pRmApi = rmapiGetInterface(RMAPI_EXTERNAL_KERNEL); 1390 1391 NV_ASSERT(session); 1392 1393 // Query the rmDevice which needed to create an rmSubDevice. 1394 portSyncRwLockAcquireRead(session->btreeLock); 1395 status = findDescriptor(session->devices, device->deviceInstance, (void**)&rmDevice); 1396 if (status != NV_OK) 1397 { 1398 portSyncRwLockReleaseRead(session->btreeLock); 1399 return status; 1400 } 1401 1402 NV_ASSERT(rmDevice); 1403 NV_ASSERT(rmDevice->deviceHandle == device->handle); 1404 1405 // Find the existing rmSubDevice. 1406 // Otherwise, allocate an rmSubDevice. 1407 portSyncRwLockAcquireWrite(rmDevice->btreeLock); 1408 if (findDescriptor(rmDevice->subDevices, device->subdeviceInstance, (void**)&rmSubDevice) == NV_OK) 1409 { 1410 NV_ASSERT(rmSubDevice); 1411 device->rmSubDevice = rmSubDevice; 1412 device->subhandle = rmSubDevice->subDeviceHandle; 1413 rmSubDevice->refCount++; 1414 portSyncRwLockReleaseWrite(rmDevice->btreeLock); 1415 portSyncRwLockReleaseRead(session->btreeLock); 1416 return NV_OK; 1417 } 1418 1419 rmSubDevice = portMemAllocNonPaged(sizeof(*rmSubDevice)); 1420 if (rmSubDevice == NULL) 1421 return NV_ERR_INSUFFICIENT_RESOURCES; 1422 1423 portMemSet(rmSubDevice, 0, sizeof(*rmSubDevice)); 1424 1425 device->rmSubDevice = rmSubDevice; 1426 rmSubDevice->refCount = 1; 1427 nv2080AllocParams.subDeviceId = device->subdeviceInstance; 1428 device->subhandle = NV01_NULL_OBJECT; 1429 status = pRmApi->Alloc(pRmApi, 1430 session->handle, 1431 device->handle, 1432 &device->subhandle, 1433 NV20_SUBDEVICE_0, 1434 &nv2080AllocParams); 1435 if (status != NV_OK) 1436 goto cleanup_subdevice_desc; 1437 rmSubDevice->subDeviceHandle = device->subhandle; 1438 1439 status = trackDescriptor(&rmDevice->subDevices, device->subdeviceInstance, rmSubDevice); 1440 if (status != NV_OK) 1441 goto cleanup_subdevice; 1442 1443 rmDevice->subDeviceCount++; 1444 1445 portSyncRwLockReleaseWrite(rmDevice->btreeLock); 1446 portSyncRwLockReleaseRead(session->btreeLock); 1447 return NV_OK; 1448 1449 cleanup_subdevice: 1450 pRmApi->Free(pRmApi, session->handle, device->subhandle); 1451 cleanup_subdevice_desc: 1452 portMemFree(rmSubDevice); 1453 portSyncRwLockReleaseWrite(rmDevice->btreeLock); 1454 portSyncRwLockReleaseRead(session->btreeLock); 1455 return status; 1456 } 1457 1458 static NvBool isDevicePascalPlus(const struct gpuDevice *device) 1459 { 1460 NV_ASSERT(device->rmDevice); 1461 return device->rmDevice->arch >= GPU_ARCHITECTURE_PASCAL; 1462 } 1463 1464 static NvBool isDeviceVoltaPlus(const struct gpuDevice *device) 1465 { 1466 NV_ASSERT(device->rmDevice); 1467 return device->rmDevice->arch >= GPU_ARCHITECTURE_VOLTA; 1468 } 1469 1470 static NvBool isDeviceTuringPlus(const struct gpuDevice *device) 1471 { 1472 NV_ASSERT(device->rmDevice); 1473 return device->rmDevice->arch >= GPU_ARCHITECTURE_TURING; 1474 } 1475 1476 static NvBool isDeviceAmperePlus(const struct gpuDevice *device) 1477 { 1478 NV_ASSERT(device->rmDevice); 1479 return device->rmDevice->arch >= GPU_ARCHITECTURE_AMPERE; 1480 } 1481 1482 static UVM_LINK_TYPE rmControlToUvmNvlinkVersion(NvU32 nvlinkVersion) 1483 { 1484 if (nvlinkVersion == NV2080_CTRL_NVLINK_STATUS_NVLINK_VERSION_INVALID) 1485 return UVM_LINK_TYPE_NONE; 1486 else if (nvlinkVersion == NV2080_CTRL_NVLINK_STATUS_NVLINK_VERSION_1_0) 1487 return UVM_LINK_TYPE_NVLINK_1; 1488 else if (nvlinkVersion == NV2080_CTRL_NVLINK_STATUS_NVLINK_VERSION_2_0 || 1489 nvlinkVersion == NV2080_CTRL_NVLINK_STATUS_NVLINK_VERSION_2_2) 1490 return UVM_LINK_TYPE_NVLINK_2; 1491 else if (nvlinkVersion == NV2080_CTRL_NVLINK_STATUS_NVLINK_VERSION_3_0 || 1492 nvlinkVersion == NV2080_CTRL_NVLINK_STATUS_NVLINK_VERSION_3_1) 1493 return UVM_LINK_TYPE_NVLINK_3; 1494 else if (nvlinkVersion == NV2080_CTRL_NVLINK_STATUS_NVLINK_VERSION_4_0) 1495 return UVM_LINK_TYPE_NVLINK_4; 1496 1497 NV_ASSERT(0); 1498 return (NvU32)-1; 1499 } 1500 1501 static NV_STATUS queryFbInfo(struct gpuDevice *device) 1502 { 1503 NV_STATUS nvStatus = NV_OK; 1504 NV2080_CTRL_FB_GET_INFO_PARAMS fbInfoParams; 1505 NV2080_CTRL_CMD_FB_GET_FB_REGION_INFO_PARAMS *fbRegionInfoParams; 1506 RM_API *pRmApi = rmapiGetInterface(RMAPI_EXTERNAL_KERNEL); 1507 NvU32 i; 1508 1509 struct fbInputParams 1510 { 1511 NV2080_CTRL_FB_INFO heapSize; 1512 NV2080_CTRL_FB_INFO reservedHeapSize; 1513 NV2080_CTRL_FB_INFO zeroFb; 1514 } fbParams; 1515 1516 fbRegionInfoParams = portMemAllocNonPaged(sizeof(*fbRegionInfoParams)); 1517 if (fbRegionInfoParams == NULL) 1518 return NV_ERR_NO_MEMORY; 1519 1520 portMemSet(fbRegionInfoParams, 0, sizeof(*fbRegionInfoParams)); 1521 portMemSet(&fbInfoParams, 0, sizeof(fbInfoParams)); 1522 portMemSet(&fbParams, 0, sizeof(fbParams)); 1523 1524 // Set up the list of parameters we are looking to extract 1525 fbParams.heapSize.index = NV2080_CTRL_FB_INFO_INDEX_HEAP_SIZE; 1526 fbParams.reservedHeapSize.index = NV2080_CTRL_FB_INFO_INDEX_VISTA_RESERVED_HEAP_SIZE; 1527 fbParams.zeroFb.index = NV2080_CTRL_FB_INFO_INDEX_FB_IS_BROKEN; 1528 1529 fbInfoParams.fbInfoListSize = sizeof(fbParams) / sizeof(fbParams.heapSize); 1530 fbInfoParams.fbInfoList = NV_PTR_TO_NvP64(&fbParams); 1531 1532 nvStatus = pRmApi->Control(pRmApi, 1533 device->session->handle, 1534 device->subhandle, 1535 NV2080_CTRL_CMD_FB_GET_INFO, 1536 &fbInfoParams, 1537 sizeof(fbInfoParams)); 1538 if (nvStatus != NV_OK) 1539 goto out; 1540 1541 nvStatus = pRmApi->Control(pRmApi, 1542 device->session->handle, 1543 device->subhandle, 1544 NV2080_CTRL_CMD_FB_GET_FB_REGION_INFO, 1545 fbRegionInfoParams, 1546 sizeof(*fbRegionInfoParams)); 1547 if (nvStatus != NV_OK) 1548 goto out; 1549 1550 device->fbInfo.heapSize = fbParams.heapSize.data; 1551 device->fbInfo.reservedHeapSize = fbParams.reservedHeapSize.data; 1552 device->fbInfo.bZeroFb = (NvBool)fbParams.zeroFb.data; 1553 1554 device->fbInfo.maxAllocatableAddress = 0; 1555 1556 for (i = 0; i < fbRegionInfoParams->numFBRegions; ++i) 1557 { 1558 device->fbInfo.maxAllocatableAddress = NV_MAX(device->fbInfo.maxAllocatableAddress, 1559 fbRegionInfoParams->fbRegion[i].limit); 1560 } 1561 1562 out: 1563 portMemFree(fbRegionInfoParams); 1564 return nvStatus; 1565 } 1566 1567 // Return the PCIE link cap max speed associated with the given GPU in 1568 // megabytes per seconds.. 1569 static NV_STATUS getPCIELinkRateMBps(struct gpuDevice *device, NvU32 *pcieLinkRate) 1570 { 1571 // PCI Express Base Specification: https://www.pcisig.com/specifications/pciexpress 1572 const NvU32 PCIE_1_ENCODING_RATIO_TOTAL = 10; 1573 const NvU32 PCIE_1_ENCODING_RATIO_EFFECTIVE = 8; 1574 const NvU32 PCIE_2_ENCODING_RATIO_TOTAL = 10; 1575 const NvU32 PCIE_2_ENCODING_RATIO_EFFECTIVE = 8; 1576 const NvU32 PCIE_3_ENCODING_RATIO_TOTAL = 130; 1577 const NvU32 PCIE_3_ENCODING_RATIO_EFFECTIVE = 128; 1578 const NvU32 PCIE_4_ENCODING_RATIO_TOTAL = 130; 1579 const NvU32 PCIE_4_ENCODING_RATIO_EFFECTIVE = 128; 1580 const NvU32 PCIE_5_ENCODING_RATIO_TOTAL = 130; 1581 const NvU32 PCIE_5_ENCODING_RATIO_EFFECTIVE = 128; 1582 const NvU32 PCIE_6_ENCODING_RATIO_TOTAL = 256; 1583 const NvU32 PCIE_6_ENCODING_RATIO_EFFECTIVE = 242; 1584 1585 RM_API *pRmApi = rmapiGetInterface(RMAPI_EXTERNAL_KERNEL); 1586 NV2080_CTRL_BUS_INFO busInfo = {0}; 1587 NV2080_CTRL_BUS_GET_INFO_PARAMS busInfoParams = {0}; 1588 NvU32 linkRate = 0; 1589 NvU32 lanes; 1590 1591 busInfo.index = NV2080_CTRL_BUS_INFO_INDEX_PCIE_GPU_LINK_CAPS; 1592 busInfoParams.busInfoListSize = 1; 1593 busInfoParams.busInfoList = NV_PTR_TO_NvP64(&busInfo); 1594 1595 NV_STATUS status = pRmApi->Control(pRmApi, 1596 device->session->handle, 1597 device->subhandle, 1598 NV2080_CTRL_CMD_BUS_GET_INFO, 1599 &busInfoParams, 1600 sizeof(busInfoParams)); 1601 if (status != NV_OK) 1602 { 1603 NV_PRINTF(LEVEL_ERROR, "%s:%d: %s\n", __FUNCTION__, 1604 __LINE__, nvstatusToString(status)); 1605 return status; 1606 } 1607 1608 lanes = DRF_VAL(2080, _CTRL_BUS_INFO, _PCIE_LINK_CAP_MAX_WIDTH, busInfo.data); 1609 1610 // Bug 2606540: RM reports PCIe transfer rate in GT/s but labels it as Gbps 1611 switch (DRF_VAL(2080, _CTRL_BUS_INFO, _PCIE_LINK_CAP_MAX_SPEED, busInfo.data)) 1612 { 1613 case NV2080_CTRL_BUS_INFO_PCIE_LINK_CAP_MAX_SPEED_2500MBPS: 1614 linkRate = ((2500 * lanes * PCIE_1_ENCODING_RATIO_EFFECTIVE) 1615 / PCIE_1_ENCODING_RATIO_TOTAL) / 8; 1616 break; 1617 case NV2080_CTRL_BUS_INFO_PCIE_LINK_CAP_MAX_SPEED_5000MBPS: 1618 linkRate = ((5000 * lanes * PCIE_2_ENCODING_RATIO_EFFECTIVE) 1619 / PCIE_2_ENCODING_RATIO_TOTAL) / 8; 1620 break; 1621 case NV2080_CTRL_BUS_INFO_PCIE_LINK_CAP_MAX_SPEED_8000MBPS: 1622 linkRate = ((8000 * lanes * PCIE_3_ENCODING_RATIO_EFFECTIVE) 1623 / PCIE_3_ENCODING_RATIO_TOTAL) / 8; 1624 break; 1625 case NV2080_CTRL_BUS_INFO_PCIE_LINK_CAP_MAX_SPEED_16000MBPS: 1626 linkRate = ((16000 * lanes * PCIE_4_ENCODING_RATIO_EFFECTIVE) 1627 / PCIE_4_ENCODING_RATIO_TOTAL) / 8; 1628 break; 1629 case NV2080_CTRL_BUS_INFO_PCIE_LINK_CAP_MAX_SPEED_32000MBPS: 1630 linkRate = ((32000 * lanes * PCIE_5_ENCODING_RATIO_EFFECTIVE) 1631 / PCIE_5_ENCODING_RATIO_TOTAL) / 8; 1632 break; 1633 case NV2080_CTRL_BUS_INFO_PCIE_LINK_CAP_MAX_SPEED_64000MBPS: 1634 linkRate = ((64000 * lanes * PCIE_6_ENCODING_RATIO_EFFECTIVE) 1635 / PCIE_6_ENCODING_RATIO_TOTAL) / 8; 1636 break; 1637 default: 1638 status = NV_ERR_INVALID_STATE; 1639 NV_PRINTF(LEVEL_ERROR, "Unknown PCIe speed\n"); 1640 } 1641 1642 *pcieLinkRate = linkRate; 1643 1644 return status; 1645 } 1646 1647 NV_STATUS nvGpuOpsDeviceCreate(struct gpuSession *session, 1648 const gpuInfo *pGpuInfo, 1649 const NvProcessorUuid *gpuUuid, 1650 struct gpuDevice **outDevice, 1651 NvBool bCreateSmcPartition) 1652 { 1653 NV_STATUS status; 1654 struct gpuDevice *device = NULL; 1655 NV0000_CTRL_GPU_GET_UUID_INFO_PARAMS gpuIdInfoParams = {{0}}; 1656 NV2080_CTRL_BUS_GET_INFO_V2_PARAMS *busInfoParams; 1657 NV2080_CTRL_CMD_NVLINK_GET_NVLINK_STATUS_PARAMS *nvlinkStatus; 1658 NvU32 nvlinkVersion; 1659 NvU32 sysmemLink; 1660 NvU32 linkBandwidthMBps; 1661 NvU32 sysmemConnType; 1662 NvBool atomicSupported; 1663 RM_API *pRmApi = rmapiGetInterface(RMAPI_EXTERNAL_KERNEL); 1664 OBJGPU *pGpu; 1665 1666 device = portMemAllocNonPaged(sizeof(*device)); 1667 if (device == NULL) 1668 return NV_ERR_INSUFFICIENT_RESOURCES; 1669 portMemSet(device, 0, sizeof(*device)); 1670 device->session = session; 1671 1672 portMemCopy(&gpuIdInfoParams.gpuUuid, NV_UUID_LEN, gpuUuid->uuid, NV_UUID_LEN); 1673 gpuIdInfoParams.flags = NV0000_CTRL_CMD_GPU_GET_UUID_INFO_FLAGS_FORMAT_BINARY; 1674 status = pRmApi->Control(pRmApi, 1675 session->handle, 1676 session->handle, 1677 NV0000_CTRL_CMD_GPU_GET_UUID_INFO, 1678 &gpuIdInfoParams, 1679 sizeof(gpuIdInfoParams)); 1680 if (status != NV_OK) 1681 goto cleanup_device_obj; 1682 1683 device->deviceInstance = gpuIdInfoParams.deviceInstance; 1684 device->subdeviceInstance = gpuIdInfoParams.subdeviceInstance; 1685 device->gpuId = gpuIdInfoParams.gpuId; 1686 1687 status = nvGpuOpsRmDeviceCreate(device); 1688 if (status != NV_OK) 1689 goto cleanup_device_obj; 1690 1691 status = nvGpuOpsRmSubDeviceCreate(device); 1692 if (status != NV_OK) 1693 goto cleanup_rm_device; 1694 1695 if (bCreateSmcPartition) 1696 { 1697 status = nvGpuOpsRmSmcPartitionCreate(device, pGpuInfo); 1698 if (status != NV_OK) 1699 goto cleanup_rm_subdevice; 1700 } 1701 1702 // Create the work submission info mapping: 1703 // * SMC is disabled, we create for the device. 1704 // * SMC is enabled, we create only for SMC partitions. 1705 if (isDeviceVoltaPlus(device) && (!pGpuInfo->smcEnabled || bCreateSmcPartition)) 1706 { 1707 status = gpuDeviceMapUsermodeRegion(device); 1708 if (status != NV_OK) 1709 goto cleanup_smc_partition; 1710 } 1711 1712 status = gpuDeviceRmSubDeviceInitEcc(device); 1713 if (status != NV_OK) 1714 goto cleanup_subdevice_usermode; 1715 1716 status = queryFbInfo(device); 1717 if (status != NV_OK) 1718 goto cleanup_ecc; 1719 1720 device->isTccMode = NV_FALSE; 1721 1722 // Non-TCC mode on Windows implies WDDM mode. 1723 device->isWddmMode = !device->isTccMode; 1724 1725 status = findDeviceClasses(session->handle, 1726 device->handle, 1727 device->subhandle, 1728 &device->hostClass, 1729 &device->ceClass, 1730 &device->computeClass, 1731 &device->faultBufferClass, 1732 &device->accessCounterBufferClass, 1733 &device->sec2Class); 1734 if (status != NV_OK) 1735 goto cleanup_ecc; 1736 1737 busInfoParams = portMemAllocNonPaged(sizeof(*busInfoParams)); 1738 if (busInfoParams == NULL) 1739 { 1740 status = NV_ERR_INSUFFICIENT_RESOURCES; 1741 goto cleanup_ecc; 1742 } 1743 portMemSet(busInfoParams, 0, sizeof(*busInfoParams)); 1744 busInfoParams->busInfoListSize = 1; 1745 busInfoParams->busInfoList[0].index = NV2080_CTRL_BUS_INFO_INDEX_SYSMEM_CONNECTION_TYPE; 1746 status = pRmApi->Control(pRmApi, 1747 device->session->handle, 1748 device->subhandle, 1749 NV2080_CTRL_CMD_BUS_GET_INFO_V2, 1750 busInfoParams, 1751 sizeof(*busInfoParams)); 1752 if (status != NV_OK) 1753 { 1754 portMemFree(busInfoParams); 1755 goto cleanup_ecc; 1756 } 1757 1758 sysmemConnType = busInfoParams->busInfoList[0].data; 1759 portMemFree(busInfoParams); 1760 1761 sysmemLink = UVM_LINK_TYPE_NONE; 1762 switch (sysmemConnType) 1763 { 1764 case NV2080_CTRL_BUS_INFO_INDEX_SYSMEM_CONNECTION_TYPE_NVLINK: 1765 { 1766 status = allocNvlinkStatusForSubdevice(device, &nvlinkStatus); 1767 if (status != NV_OK) 1768 goto cleanup_ecc; 1769 1770 nvlinkVersion = getNvlinkConnectionToNpu(nvlinkStatus, 1771 &atomicSupported, 1772 &linkBandwidthMBps); 1773 1774 sysmemLink = rmControlToUvmNvlinkVersion(nvlinkVersion); 1775 1776 portMemFree(nvlinkStatus); 1777 nvlinkStatus = NULL; 1778 break; 1779 } 1780 case NV2080_CTRL_BUS_INFO_INDEX_SYSMEM_CONNECTION_TYPE_PCIE: 1781 { 1782 sysmemLink = UVM_LINK_TYPE_PCIE; 1783 status = getPCIELinkRateMBps(device, &linkBandwidthMBps); 1784 if (status != NV_OK) 1785 goto cleanup_ecc; 1786 break; 1787 } 1788 default: 1789 { 1790 NV_PRINTF(LEVEL_ERROR, "Unsupported sysmem connection type: %d\n", 1791 sysmemConnType); 1792 NV_ASSERT(0); 1793 break; 1794 } 1795 } 1796 1797 NV_PRINTF(LEVEL_INFO, "sysmem link type: %d bw: %u\n", sysmemLink, linkBandwidthMBps); 1798 1799 NV_ASSERT(sysmemLink != UVM_LINK_TYPE_NONE); 1800 device->sysmemLink = sysmemLink; 1801 device->sysmemLinkRateMBps = linkBandwidthMBps; 1802 1803 status = allocNvlinkStatusForSubdevice(device, &nvlinkStatus); 1804 if (status != NV_OK) 1805 goto cleanup_ecc; 1806 nvlinkVersion = getNvlinkConnectionToSwitch(nvlinkStatus, 1807 &linkBandwidthMBps); 1808 1809 if (rmControlToUvmNvlinkVersion(nvlinkVersion) != UVM_LINK_TYPE_NONE) 1810 { 1811 NV_ASSERT(rmControlToUvmNvlinkVersion(nvlinkVersion) != UVM_LINK_TYPE_NVLINK_1); 1812 1813 // If the GPU is ever connected to the CPU via a switch, sysmemLink 1814 // and sysmemLinkRateMBps need to be updated accordingly. 1815 NV_ASSERT(sysmemConnType != NV2080_CTRL_BUS_INFO_INDEX_SYSMEM_CONNECTION_TYPE_NVLINK); 1816 1817 device->connectedToSwitch = NV_TRUE; 1818 } 1819 1820 portMemFree(nvlinkStatus); 1821 1822 mapInit(&device->kern2PhysDescrMap, portMemAllocatorGetGlobalNonPaged()); 1823 1824 status = rmapiLockAcquire(RMAPI_LOCK_FLAGS_READ, RM_LOCK_MODULES_GPU_OPS); 1825 if (status != NV_OK) 1826 goto cleanup_ecc; 1827 status = CliSetGpuContext(session->handle, device->handle, &pGpu, NULL); 1828 rmapiLockRelease(); 1829 if (status != NV_OK) 1830 goto cleanup_ecc; 1831 1832 if (IS_VIRTUAL_WITH_HEAVY_SRIOV(pGpu)) 1833 { 1834 device->pPagingChannelRpcMutex = portSyncMutexCreate(portMemAllocatorGetGlobalNonPaged()); 1835 if (device->pPagingChannelRpcMutex == NULL) 1836 { 1837 status = NV_ERR_NO_MEMORY; 1838 goto cleanup_ecc; 1839 } 1840 } 1841 1842 *outDevice = device; 1843 return NV_OK; 1844 1845 cleanup_ecc: 1846 gpuDeviceRmSubDeviceDeinitEcc(device); 1847 cleanup_subdevice_usermode: 1848 gpuDeviceDestroyUsermodeRegion(device); 1849 cleanup_smc_partition: 1850 nvGpuOpsRmSmcPartitionDestroy(device); 1851 cleanup_rm_subdevice: 1852 nvGpuOpsDeviceDestroy(device); 1853 device = NULL; 1854 cleanup_rm_device: 1855 if (device) 1856 nvGpuOpsRmDeviceDestroy(device); 1857 cleanup_device_obj: 1858 portMemFree(device); 1859 return status; 1860 } 1861 1862 NV_STATUS nvGpuOpsDeviceDestroy(struct gpuDevice *device) 1863 { 1864 deviceDesc *rmDevice = device->rmDevice; 1865 subDeviceDesc *rmSubDevice = device->rmSubDevice; 1866 RM_API *pRmApi = rmapiGetInterface(RMAPI_EXTERNAL_KERNEL); 1867 1868 rmSubDevice->refCount--; 1869 1870 if (rmSubDevice->refCount == 0) 1871 { 1872 gpuDeviceDestroyUsermodeRegion(device); 1873 1874 gpuDeviceRmSubDeviceDeinitEcc(device); 1875 1876 nvGpuOpsRmSmcPartitionDestroy(device); 1877 1878 portSyncRwLockAcquireWrite(rmDevice->btreeLock); 1879 rmDevice->subDeviceCount--; 1880 deleteDescriptor(&rmDevice->subDevices, device->subdeviceInstance, (void**)&rmSubDevice); 1881 pRmApi->Free(pRmApi, device->session->handle, rmSubDevice->subDeviceHandle); 1882 portMemFree(rmSubDevice); 1883 portSyncRwLockReleaseWrite(rmDevice->btreeLock); 1884 1885 nvGpuOpsRmDeviceDestroy(device); 1886 } 1887 1888 mapDestroy(&device->kern2PhysDescrMap); 1889 1890 if (device->pPagingChannelRpcMutex != NULL) 1891 portSyncMutexDestroy(device->pPagingChannelRpcMutex); 1892 1893 portMemFree(device); 1894 return NV_OK; 1895 } 1896 1897 NV_STATUS nvGpuOpsOwnPageFaultIntr(struct gpuDevice *device, 1898 NvBool bOwnInterrupts) 1899 { 1900 RM_API *pRmApi = rmapiGetInterface(RMAPI_EXTERNAL_KERNEL); 1901 NV2080_CTRL_MC_CHANGE_REPLAYABLE_FAULT_OWNERSHIP_PARAMS changeParams = {0}; 1902 changeParams.bOwnedByRm = !bOwnInterrupts; 1903 return pRmApi->Control(pRmApi, 1904 device->session->handle, 1905 device->subhandle, 1906 NV2080_CTRL_CMD_MC_CHANGE_REPLAYABLE_FAULT_OWNERSHIP, 1907 &changeParams, 1908 sizeof(changeParams)); 1909 } 1910 1911 static NV_STATUS getAddressSpaceInfo(struct gpuAddressSpace *vaSpace, 1912 OBJGPU *pGpu, 1913 UvmGpuAddressSpaceInfo *vaSpaceInfo) 1914 { 1915 NV_STATUS status; 1916 NV0080_CTRL_DMA_ADV_SCHED_GET_VA_CAPS_PARAMS params = {0}; 1917 OBJVASPACE *pVAS = NULL; 1918 RM_API *pRmApi = rmapiGetInterface(RMAPI_EXTERNAL_KERNEL); 1919 struct gpuDevice *device = vaSpace->device; 1920 struct gpuSession *session = device->session; 1921 subDeviceDesc *rmSubDevice = device->rmSubDevice; 1922 1923 params.hVASpace = vaSpace->handle; 1924 status = pRmApi->Control(pRmApi, 1925 session->handle, 1926 device->handle, 1927 NV0080_CTRL_CMD_DMA_ADV_SCHED_GET_VA_CAPS, 1928 ¶ms, 1929 sizeof(params)); 1930 if (status != NV_OK) 1931 return status; 1932 1933 vaSpaceInfo->bigPageSize = params.bigPageSize; 1934 1935 // TODO: Acquired because resserv expects RMAPI lock. Necessary? 1936 { 1937 RsClient *pClient; 1938 status = rmapiLockAcquire(RMAPI_LOCK_FLAGS_READ, RM_LOCK_MODULES_GPU_OPS); 1939 if (status != NV_OK) 1940 return status; 1941 1942 status = serverGetClientUnderLock(&g_resServ, session->handle, &pClient); 1943 if (status == NV_OK) 1944 { 1945 status = vaspaceGetByHandleOrDeviceDefault(pClient, device->handle, vaSpace->handle, &pVAS); 1946 } 1947 rmapiLockRelease(); 1948 if (status != NV_OK) 1949 return status; 1950 } 1951 1952 vaSpaceInfo->atsEnabled = vaspaceIsAtsEnabled(pVAS); 1953 1954 if (isDeviceTuringPlus(vaSpace->device)) 1955 { 1956 // 1957 // On Turing+ use the VIRTUAL_FUNCTION so this works fine in hosts and 1958 // guests 1959 // 1960 void *bar0Mapping = gpuBar0BaseAddress(pGpu); 1961 vaSpaceInfo->time0Offset = (NvU32 *)((NvU8*)bar0Mapping + GPU_GET_VREG_OFFSET(pGpu, NV_VIRTUAL_FUNCTION_TIME_0)); 1962 vaSpaceInfo->time1Offset = (NvU32 *)((NvU8*)bar0Mapping + GPU_GET_VREG_OFFSET(pGpu, NV_VIRTUAL_FUNCTION_TIME_1)); 1963 } 1964 else if (isDeviceVoltaPlus(vaSpace->device)) 1965 { 1966 NV_ASSERT(rmSubDevice->clientRegionMapping); 1967 1968 // 1969 // On Volta prefer USERMODE mappings for better passthrough 1970 // performance on some hypervisors (see CL23003453 for more details) 1971 // 1972 vaSpaceInfo->time0Offset = (NvU32 *)((NvU8*)rmSubDevice->clientRegionMapping + NVC361_TIME_0); 1973 vaSpaceInfo->time1Offset = (NvU32 *)((NvU8*)rmSubDevice->clientRegionMapping + NVC361_TIME_1); 1974 } 1975 else 1976 { 1977 void *bar0Mapping = gpuBar0BaseAddress(pGpu); 1978 vaSpaceInfo->time0Offset = (NvU32 *)((NvU8*)bar0Mapping + NV_PTIMER_TIME_0); 1979 vaSpaceInfo->time1Offset = (NvU32 *)((NvU8*)bar0Mapping + NV_PTIMER_TIME_1); 1980 } 1981 1982 if (IS_MIG_IN_USE(pGpu)) 1983 { 1984 KERNEL_MIG_GPU_INSTANCE *pKernelMIGGpuInstance = rmSubDevice->smcPartition.info; 1985 MIG_RESOURCE_ALLOCATION *pResourceAllocation = &pKernelMIGGpuInstance->resourceAllocation; 1986 1987 vaSpaceInfo->maxSubctxCount = pResourceAllocation->veidCount; 1988 vaSpaceInfo->smcGpcCount = pResourceAllocation->gpcCount; 1989 } 1990 else 1991 { 1992 NV2080_CTRL_FIFO_GET_INFO_PARAMS *fifoGetInfoParams; 1993 1994 // 1995 // NV2080_CTRL_FIFO_GET_INFO_PARAMS takes over 2KB, so we use a heap 1996 // allocation 1997 // 1998 fifoGetInfoParams = portMemAllocNonPaged(sizeof(*fifoGetInfoParams)); 1999 if (fifoGetInfoParams == NULL) 2000 return NV_ERR_NO_MEMORY; 2001 2002 fifoGetInfoParams->fifoInfoTblSize = 1; 2003 fifoGetInfoParams->fifoInfoTbl[0].index = NV2080_CTRL_FIFO_INFO_INDEX_MAX_SUBCONTEXT_PER_GROUP; 2004 2005 status = pRmApi->Control(pRmApi, 2006 session->handle, 2007 rmSubDevice->subDeviceHandle, 2008 NV2080_CTRL_CMD_FIFO_GET_INFO, 2009 fifoGetInfoParams, 2010 sizeof(*fifoGetInfoParams)); 2011 2012 vaSpaceInfo->maxSubctxCount = fifoGetInfoParams->fifoInfoTbl[0].data; 2013 2014 portMemFree(fifoGetInfoParams); 2015 2016 if (status != NV_OK) 2017 return status; 2018 } 2019 2020 return NV_OK; 2021 } 2022 2023 // This function will create a new address space object of type FERMI_VASPACE_A. 2024 NV_STATUS nvGpuOpsAddressSpaceCreate(struct gpuDevice *device, 2025 NvU64 vaBase, 2026 NvU64 vaSize, 2027 struct gpuAddressSpace **vaSpace, 2028 UvmGpuAddressSpaceInfo *vaSpaceInfo) 2029 { 2030 NV_STATUS status; 2031 struct gpuAddressSpace *gpuVaSpace = NULL; 2032 OBJGPU *pGpu = NULL; 2033 NV_VASPACE_ALLOCATION_PARAMETERS vaParams = {0}; 2034 RM_API *pRmApi = rmapiGetInterface(RMAPI_EXTERNAL_KERNEL); 2035 PORT_MEM_ALLOCATOR *pAlloc = portMemAllocatorGetGlobalNonPaged(); 2036 2037 gpuVaSpace = portMemAllocNonPaged(sizeof(*gpuVaSpace)); 2038 if (gpuVaSpace == NULL) 2039 return NV_ERR_NO_MEMORY; 2040 2041 portMemSet(gpuVaSpace, 0, sizeof(*gpuVaSpace)); 2042 gpuVaSpace->vaBase = vaBase; 2043 gpuVaSpace->vaSize = vaSize; 2044 gpuVaSpace->handle = NV01_NULL_OBJECT; 2045 gpuVaSpace->allocationsLock = portSyncRwLockCreate(pAlloc); 2046 gpuVaSpace->cpuMappingsLock = portSyncRwLockCreate(pAlloc); 2047 gpuVaSpace->physAllocationsLock = portSyncRwLockCreate(pAlloc); 2048 2049 *vaSpace = NULL; 2050 portMemSet(vaSpaceInfo, 0, sizeof(*vaSpaceInfo)); 2051 2052 // Create a new vaSpace object 2053 vaParams.index= NV_VASPACE_ALLOCATION_INDEX_GPU_NEW; 2054 vaParams.vaBase = gpuVaSpace->vaBase; 2055 vaParams.vaSize = gpuVaSpace->vaSize; 2056 vaParams.flags = gpuVaSpace->vaSize ? 2057 NV_VASPACE_ALLOCATION_FLAGS_SHARED_MANAGEMENT : 2058 NV_VASPACE_ALLOCATION_FLAGS_NONE; 2059 2060 // TODO: Acquired because CliSetGpuContext expects RMAPI lock. Necessary? 2061 status = rmapiLockAcquire(RMAPI_LOCK_FLAGS_READ, RM_LOCK_MODULES_GPU_OPS); 2062 if (status != NV_OK) 2063 goto cleanup_vaspace; 2064 status = CliSetGpuContext(device->session->handle, device->handle, &pGpu, NULL); 2065 rmapiLockRelease(); 2066 if (status != NV_OK) 2067 { 2068 goto cleanup_vaspace; 2069 } 2070 2071 status = pRmApi->Alloc(pRmApi, 2072 device->session->handle, 2073 device->handle, 2074 &gpuVaSpace->handle, FERMI_VASPACE_A, 2075 &vaParams); 2076 if (status != NV_OK) 2077 { 2078 goto cleanup_struct; 2079 } 2080 2081 // If base & Size were not provided before, they would have been filled now 2082 gpuVaSpace->vaBase = vaParams.vaBase; 2083 gpuVaSpace->vaSize = vaParams.vaSize; 2084 gpuVaSpace->device = device; 2085 2086 status = getAddressSpaceInfo(gpuVaSpace, pGpu, vaSpaceInfo); 2087 if (status != NV_OK) 2088 { 2089 goto cleanup_vaspace; 2090 } 2091 2092 *vaSpace = gpuVaSpace; 2093 return status; 2094 2095 cleanup_vaspace: 2096 pRmApi->Free(pRmApi, device->session->handle, gpuVaSpace->handle); 2097 2098 cleanup_struct: 2099 portSyncRwLockDestroy(gpuVaSpace->allocationsLock); 2100 portSyncRwLockDestroy(gpuVaSpace->cpuMappingsLock); 2101 portSyncRwLockDestroy(gpuVaSpace->physAllocationsLock); 2102 portMemFree(gpuVaSpace); 2103 return status; 2104 } 2105 2106 NV_STATUS nvGpuOpsDupAddressSpace(struct gpuDevice *device, 2107 NvHandle hUserClient, 2108 NvHandle hUserVASpace, 2109 struct gpuAddressSpace **vaSpace, 2110 UvmGpuAddressSpaceInfo *vaSpaceInfo) 2111 { 2112 NV_STATUS status = NV_OK; 2113 struct gpuAddressSpace *gpuVaSpace = NULL; 2114 struct gpuSession *session = device->session; 2115 OBJVASPACE *pVAS = NULL; 2116 OBJGPU *pGpu = NULL; 2117 RsResourceRef *pVaSpaceRef; 2118 RsResourceRef *pDeviceRef; 2119 Device *pDevice = NULL; 2120 RM_API *pRmApi = rmapiGetInterface(RMAPI_EXTERNAL_KERNEL); 2121 PORT_MEM_ALLOCATOR *pAlloc = portMemAllocatorGetGlobalNonPaged(); 2122 2123 *vaSpace = NULL; 2124 portMemSet(vaSpaceInfo, 0, sizeof(*vaSpaceInfo)); 2125 2126 // TODO - Move this check to RMDupObject later. 2127 // TODO: Acquired because serverutilGetResourceRef expects RMAPI lock. Necessary? 2128 // Find the device associated with the hUserVASpace and verify that the UUID belongs to it. 2129 status = rmapiLockAcquire(RMAPI_LOCK_FLAGS_READ, RM_LOCK_MODULES_GPU_OPS); 2130 if (status != NV_OK) 2131 return status; 2132 status = serverutilGetResourceRef(hUserClient, hUserVASpace, &pVaSpaceRef); 2133 rmapiLockRelease(); 2134 if (status != NV_OK) 2135 return status; 2136 2137 if (!dynamicCast(pVaSpaceRef->pResource, VaSpaceApi)) 2138 return NV_ERR_INVALID_OBJECT; 2139 2140 // The parent must be valid and a device if this is a VA space handle 2141 // TODO: Acquired because serverutilGetResourceRef expects RMAPI lock. Necessary? 2142 status = rmapiLockAcquire(RMAPI_LOCK_FLAGS_READ, RM_LOCK_MODULES_GPU_OPS); 2143 if (status != NV_OK) 2144 return status; 2145 status = serverutilGetResourceRef(hUserClient, pVaSpaceRef->pParentRef->hResource, &pDeviceRef); 2146 rmapiLockRelease(); 2147 NV_ASSERT(status == NV_OK); 2148 2149 pDevice = dynamicCast(pDeviceRef->pResource, Device); 2150 NV_ASSERT(pDevice != NULL); 2151 2152 if (pDevice->deviceInst != device->deviceInstance) 2153 return NV_ERR_OTHER_DEVICE_FOUND; 2154 2155 gpuVaSpace = portMemAllocNonPaged(sizeof(*gpuVaSpace)); 2156 if (gpuVaSpace == NULL) 2157 return NV_ERR_NO_MEMORY; 2158 2159 portMemSet(gpuVaSpace, 0, sizeof(*gpuVaSpace)); 2160 2161 gpuVaSpace->device = device; 2162 gpuVaSpace->allocationsLock = portSyncRwLockCreate(pAlloc); 2163 gpuVaSpace->cpuMappingsLock = portSyncRwLockCreate(pAlloc); 2164 gpuVaSpace->physAllocationsLock = portSyncRwLockCreate(pAlloc); 2165 2166 // dup the vaspace 2167 gpuVaSpace->handle = NV01_NULL_OBJECT; 2168 status = pRmApi->DupObject(pRmApi, 2169 session->handle, 2170 device->handle, 2171 &gpuVaSpace->handle, 2172 hUserClient, 2173 hUserVASpace, 2174 NV04_DUP_HANDLE_FLAGS_REJECT_KERNEL_DUP_PRIVILEGE); 2175 if (status != NV_OK) 2176 goto cleanup_vaspace; 2177 2178 // TODO: Acquired because these functions expect RMAPI lock. Necessary? 2179 { 2180 RsClient *pClient; 2181 status = rmapiLockAcquire(RMAPI_LOCK_FLAGS_READ, RM_LOCK_MODULES_GPU_OPS); 2182 if (status != NV_OK) 2183 goto cleanup_dup_vaspace; 2184 2185 status = serverGetClientUnderLock(&g_resServ, session->handle, &pClient); 2186 if (status == NV_OK) 2187 { 2188 status = CliSetGpuContext(session->handle, device->handle, &pGpu, NULL); 2189 if (status == NV_OK) 2190 { 2191 status = vaspaceGetByHandleOrDeviceDefault(pClient, device->handle, gpuVaSpace->handle, &pVAS); 2192 } 2193 } 2194 rmapiLockRelease(); 2195 if (status != NV_OK) 2196 goto cleanup_dup_vaspace; 2197 } 2198 2199 if (!vaspaceIsExternallyOwned(pVAS)) 2200 { 2201 status = NV_ERR_INVALID_FLAGS; 2202 goto cleanup_dup_vaspace; 2203 } 2204 2205 status = getAddressSpaceInfo(gpuVaSpace, pGpu, vaSpaceInfo); 2206 if (status != NV_OK) 2207 goto cleanup_dup_vaspace; 2208 2209 *vaSpace = gpuVaSpace; 2210 2211 return NV_OK; 2212 2213 cleanup_dup_vaspace: 2214 pRmApi->Free(pRmApi, session->handle, gpuVaSpace->handle); 2215 cleanup_vaspace: 2216 portSyncRwLockDestroy(gpuVaSpace->allocationsLock); 2217 portSyncRwLockDestroy(gpuVaSpace->cpuMappingsLock); 2218 portSyncRwLockDestroy(gpuVaSpace->physAllocationsLock); 2219 portMemFree(gpuVaSpace); 2220 return status; 2221 } 2222 2223 // Get the NVLink connection status for the given device. On success, caller is 2224 // responsible of freeing the memory. 2225 static NV_STATUS allocNvlinkStatusForSubdevice(struct gpuDevice *device, 2226 NV2080_CTRL_CMD_NVLINK_GET_NVLINK_STATUS_PARAMS **nvlinkStatusOut) 2227 { 2228 NV_STATUS status; 2229 NV2080_CTRL_CMD_NVLINK_GET_NVLINK_STATUS_PARAMS *nvlinkStatus; 2230 RM_API *pRmApi = rmapiGetInterface(RMAPI_EXTERNAL_KERNEL); 2231 2232 *nvlinkStatusOut = NULL; 2233 2234 nvlinkStatus = portMemAllocNonPaged(sizeof(*nvlinkStatus)); 2235 if (nvlinkStatus == NULL) 2236 return NV_ERR_NO_MEMORY; 2237 2238 portMemSet(nvlinkStatus, 0, sizeof(*nvlinkStatus)); 2239 status = pRmApi->Control(pRmApi, 2240 device->session->handle, 2241 device->subhandle, 2242 NV2080_CTRL_CMD_NVLINK_GET_NVLINK_STATUS, 2243 nvlinkStatus, 2244 sizeof(*nvlinkStatus)); 2245 if (status == NV_ERR_NOT_SUPPORTED) 2246 { 2247 portMemSet(nvlinkStatus, 0, sizeof(*nvlinkStatus)); 2248 } 2249 else if (status != NV_OK) 2250 { 2251 portMemFree(nvlinkStatus); 2252 NV_PRINTF(LEVEL_ERROR, "%s:%d: %s\n", __FUNCTION__, 2253 __LINE__, nvstatusToString(status)); 2254 return status; 2255 } 2256 2257 *nvlinkStatusOut = nvlinkStatus; 2258 2259 return NV_OK; 2260 } 2261 2262 // If the given NvLink connection has a GPU device as an endpoint, return the 2263 // version of the NvLink connection with that GPU , and the maximum 2264 // unidirectional bandwidth in megabytes per second. Otherwise, return 2265 // NV2080_CTRL_NVLINK_STATUS_NVLINK_VERSION_INVALID. 2266 static NvU32 getNvlinkConnectionToGpu(const NV2080_CTRL_CMD_NVLINK_GET_NVLINK_STATUS_PARAMS *nvlinkStatus, 2267 OBJGPU *pGpu, 2268 NvU32 *linkBandwidthMBps) 2269 { 2270 NvU32 i; 2271 2272 NvU32 version = NV2080_CTRL_NVLINK_STATUS_NVLINK_VERSION_INVALID; 2273 NvU32 domain = gpuGetDomain(pGpu); 2274 NvU16 bus = gpuGetBus(pGpu); 2275 NvU16 device = gpuGetDevice(pGpu); 2276 NvU32 bwMBps = 0; 2277 2278 for (i = 0; i < NV2080_CTRL_NVLINK_MAX_LINKS; ++i) 2279 { 2280 if (((1 << i) & nvlinkStatus->enabledLinkMask) == 0) 2281 continue; 2282 2283 if (!nvlinkStatus->linkInfo[i].connected) 2284 continue; 2285 2286 // Skip loopback/loopout connections 2287 if (nvlinkStatus->linkInfo[i].loopProperty != NV2080_CTRL_NVLINK_STATUS_LOOP_PROPERTY_NONE) 2288 continue; 2289 2290 if (nvlinkStatus->linkInfo[i].remoteDeviceInfo.deviceType == NV2080_CTRL_NVLINK_DEVICE_INFO_DEVICE_TYPE_GPU) 2291 { 2292 if ((nvlinkStatus->linkInfo[i].remoteDeviceInfo.deviceIdFlags & 2293 NV2080_CTRL_NVLINK_DEVICE_INFO_DEVICE_ID_FLAGS_PCI) == 0) 2294 { 2295 NV_PRINTF(LEVEL_ERROR, "No PCI information for GPU.\n"); 2296 continue; 2297 } 2298 2299 if ((domain == nvlinkStatus->linkInfo[i].remoteDeviceInfo.domain) && 2300 (bus == nvlinkStatus->linkInfo[i].remoteDeviceInfo.bus) && 2301 (device == nvlinkStatus->linkInfo[i].remoteDeviceInfo.device) && 2302 (pGpu->idInfo.PCIDeviceID == nvlinkStatus->linkInfo[i].remoteDeviceInfo.pciDeviceId)) 2303 { 2304 NvU32 capsTbl = nvlinkStatus->linkInfo[i].capsTbl; 2305 2306 NV_ASSERT(NV2080_CTRL_NVLINK_GET_CAP(((NvU8 *)&capsTbl), NV2080_CTRL_NVLINK_CAPS_P2P_ATOMICS)); 2307 2308 if (bwMBps == 0) 2309 version = nvlinkStatus->linkInfo[i].nvlinkVersion; 2310 2311 bwMBps += nvlinkStatus->linkInfo[i].nvlinkLineRateMbps; 2312 NV_ASSERT(version == nvlinkStatus->linkInfo[i].nvlinkVersion); 2313 } 2314 } 2315 } 2316 2317 *linkBandwidthMBps = bwMBps; 2318 if (version == NV2080_CTRL_NVLINK_STATUS_NVLINK_VERSION_INVALID) 2319 NV_ASSERT(*linkBandwidthMBps == 0); 2320 2321 return version; 2322 } 2323 2324 // If the given NvLink connection has a NPU device as an endpoint, return the 2325 // version of the NvLink connection with that NPU , and the maximum 2326 // unidirectional bandwidth in megabytes per second. Otherwise, return 2327 // NV2080_CTRL_NVLINK_STATUS_NVLINK_VERSION_INVALID. 2328 static NvU32 getNvlinkConnectionToNpu(const NV2080_CTRL_CMD_NVLINK_GET_NVLINK_STATUS_PARAMS *nvlinkStatus, 2329 NvBool *atomicSupported, 2330 NvU32 *linkBandwidthMBps) 2331 { 2332 NvU32 i; 2333 NvU32 bwMBps = 0; 2334 NvU32 version = NV2080_CTRL_NVLINK_STATUS_NVLINK_VERSION_INVALID; 2335 2336 *atomicSupported = NV_FALSE; 2337 2338 for (i = 0; i < NV2080_CTRL_NVLINK_MAX_LINKS; ++i) 2339 { 2340 if (((1 << i) & nvlinkStatus->enabledLinkMask) == 0) 2341 continue; 2342 2343 if (!nvlinkStatus->linkInfo[i].connected) 2344 continue; 2345 2346 // Skip loopback/loopout connections 2347 if (nvlinkStatus->linkInfo[i].loopProperty != NV2080_CTRL_NVLINK_STATUS_LOOP_PROPERTY_NONE) 2348 continue; 2349 2350 if (nvlinkStatus->linkInfo[i].remoteDeviceInfo.deviceType == NV2080_CTRL_NVLINK_DEVICE_INFO_DEVICE_TYPE_NPU) 2351 { 2352 NvU32 capsTbl = nvlinkStatus->linkInfo[i].capsTbl; 2353 NvBool atomicCap = !!NV2080_CTRL_NVLINK_GET_CAP(((NvU8 *)&capsTbl), NV2080_CTRL_NVLINK_CAPS_SYSMEM_ATOMICS); 2354 2355 if (bwMBps == 0) 2356 { 2357 *atomicSupported = atomicCap; 2358 version = nvlinkStatus->linkInfo[i].nvlinkVersion; 2359 } 2360 bwMBps += nvlinkStatus->linkInfo[i].nvlinkLineRateMbps; 2361 NV_ASSERT(version == nvlinkStatus->linkInfo[i].nvlinkVersion); 2362 NV_ASSERT(*atomicSupported == atomicCap); 2363 } 2364 } 2365 2366 *linkBandwidthMBps = bwMBps; 2367 if (version == NV2080_CTRL_NVLINK_STATUS_NVLINK_VERSION_INVALID) 2368 NV_ASSERT(*linkBandwidthMBps == 0); 2369 2370 return version; 2371 } 2372 2373 // If the given NvLink connection has a switch as an endpoint, return the 2374 // version of the NvLink connection with that switch, and the maximum 2375 // unidirectional bandwidth in megabytes per second. Otherwise, return 2376 // NV2080_CTRL_NVLINK_STATUS_NVLINK_VERSION_INVALID. 2377 static NvU32 getNvlinkConnectionToSwitch(const NV2080_CTRL_CMD_NVLINK_GET_NVLINK_STATUS_PARAMS *nvlinkStatus, 2378 NvU32 *linkBandwidthMBps) 2379 { 2380 NvU32 i; 2381 NvU32 bwMBps = 0; 2382 NvU32 version = NV2080_CTRL_NVLINK_STATUS_NVLINK_VERSION_INVALID; 2383 2384 for (i = 0; i < NV2080_CTRL_NVLINK_MAX_LINKS; ++i) 2385 { 2386 if (((1 << i) & nvlinkStatus->enabledLinkMask) == 0) 2387 continue; 2388 2389 if (!nvlinkStatus->linkInfo[i].connected) 2390 continue; 2391 2392 // Skip loopback/loopout connections 2393 if (nvlinkStatus->linkInfo[i].loopProperty != NV2080_CTRL_NVLINK_STATUS_LOOP_PROPERTY_NONE) 2394 continue; 2395 2396 if (nvlinkStatus->linkInfo[i].remoteDeviceInfo.deviceType == NV2080_CTRL_NVLINK_DEVICE_INFO_DEVICE_TYPE_SWITCH) 2397 { 2398 if (bwMBps == 0) 2399 version = nvlinkStatus->linkInfo[i].nvlinkVersion; 2400 2401 bwMBps += nvlinkStatus->linkInfo[i].nvlinkLineRateMbps; 2402 NV_ASSERT(version == nvlinkStatus->linkInfo[i].nvlinkVersion); 2403 } 2404 } 2405 2406 *linkBandwidthMBps = bwMBps; 2407 if (version == NV2080_CTRL_NVLINK_STATUS_NVLINK_VERSION_INVALID) 2408 NV_ASSERT(*linkBandwidthMBps == 0); 2409 2410 return version; 2411 } 2412 2413 // Compute whether the non-peer GPUs with the given NVLink connections can 2414 // communicate through P9 NPUs 2415 static NV_STATUS gpusHaveNpuNvlink(NV2080_CTRL_CMD_NVLINK_GET_NVLINK_STATUS_PARAMS *nvlinkStatus1, 2416 NV2080_CTRL_CMD_NVLINK_GET_NVLINK_STATUS_PARAMS *nvlinkStatus2, 2417 NvU32 *nvlinkVersion, 2418 NvU32 *linkBandwidthMBps) 2419 { 2420 NvU32 nvlinkVersion1, nvlinkVersion2; 2421 NvU32 tmpLinkBandwidthMBps; 2422 NvBool atomicSupported1, atomicSupported2; 2423 2424 *nvlinkVersion = NV2080_CTRL_NVLINK_STATUS_NVLINK_VERSION_INVALID; 2425 2426 nvlinkVersion1 = getNvlinkConnectionToNpu(nvlinkStatus1, 2427 &atomicSupported1, 2428 &tmpLinkBandwidthMBps); 2429 nvlinkVersion2 = getNvlinkConnectionToNpu(nvlinkStatus2, 2430 &atomicSupported2, 2431 &tmpLinkBandwidthMBps); 2432 2433 if (nvlinkVersion1 == NV2080_CTRL_NVLINK_STATUS_NVLINK_VERSION_INVALID || 2434 nvlinkVersion2 == NV2080_CTRL_NVLINK_STATUS_NVLINK_VERSION_INVALID) 2435 return NV_OK; 2436 2437 // Non-peer GPU communication over NPU is only supported on NVLink 2.0 or 2438 // greater 2439 if (nvlinkVersion1 == NV2080_CTRL_NVLINK_STATUS_NVLINK_VERSION_1_0 || 2440 nvlinkVersion2 == NV2080_CTRL_NVLINK_STATUS_NVLINK_VERSION_1_0) 2441 { 2442 // NVLink1 devices cannot be mixed with other versions. NVLink3 2443 // supports mixing NVLink2 and NVLink3 devices 2444 NV_ASSERT(nvlinkVersion1 == nvlinkVersion2); 2445 return NV_OK; 2446 } 2447 2448 NV_ASSERT(atomicSupported1); 2449 NV_ASSERT(atomicSupported2); 2450 2451 // We do not explore the whole connectivity graph. We assume that NPUs 2452 // connected to NVLink2 (and greater) can forward memory requests so that 2453 // if GPU A is connected to NPU M and GPU B is connected to NPU N, A can 2454 // access B. 2455 *nvlinkVersion = NV_MIN(nvlinkVersion1, nvlinkVersion2); 2456 2457 // Link bandwidth not provided because the intermediate link rate could 2458 // vary a lot with system topologies & current load, making this bandwidth 2459 // obsolete. 2460 *linkBandwidthMBps = 0; 2461 2462 return NV_OK; 2463 } 2464 2465 static NV_STATUS rmSystemP2PCapsControl(struct gpuDevice *device1, 2466 struct gpuDevice *device2, 2467 NV0000_CTRL_SYSTEM_GET_P2P_CAPS_V2_PARAMS *p2pCapsParams) 2468 { 2469 RM_API *pRmApi = rmapiGetInterface(RMAPI_EXTERNAL_KERNEL); 2470 2471 portMemSet(p2pCapsParams, 0, sizeof(*p2pCapsParams)); 2472 p2pCapsParams->gpuIds[0] = device1->gpuId; 2473 p2pCapsParams->gpuIds[1] = device2->gpuId; 2474 p2pCapsParams->gpuCount = 2; 2475 2476 NvHandle handle = device1->session->handle; 2477 NV_STATUS status = pRmApi->Control(pRmApi, 2478 handle, 2479 handle, 2480 NV0000_CTRL_CMD_SYSTEM_GET_P2P_CAPS_V2, 2481 p2pCapsParams, 2482 sizeof(*p2pCapsParams)); 2483 if (status != NV_OK) 2484 { 2485 NV_PRINTF(LEVEL_ERROR, "%s:%d: %s\n", __FUNCTION__, 2486 __LINE__, nvstatusToString(status)); 2487 } 2488 2489 return status; 2490 } 2491 2492 // Get R/W/A access capabilities and the link type between the two given GPUs 2493 static NV_STATUS getSystemP2PCaps(struct gpuDevice *device1, 2494 struct gpuDevice *device2, 2495 struct systemP2PCaps *p2pCaps) 2496 { 2497 NV0000_CTRL_SYSTEM_GET_P2P_CAPS_V2_PARAMS *p2pCapsParams = NULL; 2498 NV_STATUS status = NV_OK; 2499 2500 p2pCapsParams = portMemAllocNonPaged(sizeof(*p2pCapsParams)); 2501 if (p2pCapsParams == NULL) 2502 { 2503 status = NV_ERR_NO_MEMORY; 2504 goto done; 2505 } 2506 2507 status = rmSystemP2PCapsControl(device1, device2, p2pCapsParams); 2508 if (status != NV_OK) 2509 goto done; 2510 2511 portMemSet(p2pCaps, 0, sizeof(*p2pCaps)); 2512 p2pCaps->peerIds[0] = p2pCapsParams->busPeerIds[0 * 2 + 1]; 2513 p2pCaps->peerIds[1] = p2pCapsParams->busPeerIds[1 * 2 + 0]; 2514 p2pCaps->nvlinkSupported = !!REF_VAL(NV0000_CTRL_SYSTEM_GET_P2P_CAPS_NVLINK_SUPPORTED, p2pCapsParams->p2pCaps); 2515 p2pCaps->atomicSupported = !!REF_VAL(NV0000_CTRL_SYSTEM_GET_P2P_CAPS_ATOMICS_SUPPORTED, p2pCapsParams->p2pCaps); 2516 p2pCaps->indirectAccessSupported = !!REF_VAL(NV0000_CTRL_SYSTEM_GET_P2P_CAPS_INDIRECT_NVLINK_SUPPORTED, 2517 p2pCapsParams->p2pCaps); 2518 2519 // TODO: Bug 1768805: Check both reads and writes since RM seems to be 2520 // currently incorrectly reporting just the P2P write cap on some 2521 // systems that cannot support P2P at all. See the bug for more 2522 // details. 2523 if (REF_VAL(NV0000_CTRL_SYSTEM_GET_P2P_CAPS_READS_SUPPORTED, p2pCapsParams->p2pCaps) && 2524 REF_VAL(NV0000_CTRL_SYSTEM_GET_P2P_CAPS_WRITES_SUPPORTED, p2pCapsParams->p2pCaps)) 2525 { 2526 NV_ASSERT(!p2pCaps->indirectAccessSupported); 2527 2528 p2pCaps->accessSupported = NV_TRUE; 2529 } 2530 2531 if (p2pCaps->nvlinkSupported || p2pCaps->indirectAccessSupported) 2532 { 2533 // Exactly one CE is expected to be recommended for transfers between 2534 // NvLink peers 2535 NV_ASSERT(nvPopCount32(p2pCapsParams->p2pOptimalWriteCEs) == 1); 2536 2537 // Query the write mask only; UVM has no use for the read mask 2538 p2pCaps->optimalNvlinkWriteCEs[0] = BIT_IDX_32(p2pCapsParams->p2pOptimalWriteCEs); 2539 2540 // Query the P2P capabilities of device2->device1, which may be 2541 // different from those of device1->device2 2542 status = rmSystemP2PCapsControl(device2, device1, p2pCapsParams); 2543 if (status != NV_OK) 2544 goto done; 2545 2546 NV_ASSERT(nvPopCount32(p2pCapsParams->p2pOptimalWriteCEs) == 1); 2547 2548 p2pCaps->optimalNvlinkWriteCEs[1] = BIT_IDX_32(p2pCapsParams->p2pOptimalWriteCEs); 2549 } 2550 2551 done: 2552 portMemFree(p2pCapsParams); 2553 return status; 2554 } 2555 2556 // Return the NVLink P2P capabilities of the peer GPUs with the given devices 2557 static NV_STATUS getNvlinkP2PCaps(struct gpuDevice *device1, 2558 struct gpuDevice *device2, 2559 NV2080_CTRL_CMD_NVLINK_GET_NVLINK_STATUS_PARAMS *nvlinkStatus1, 2560 NV2080_CTRL_CMD_NVLINK_GET_NVLINK_STATUS_PARAMS *nvlinkStatus2, 2561 NvU32 *nvlinkVersion, 2562 NvU32 *linkBandwidthMBps) 2563 { 2564 NvU32 nvlinkVersion1, nvlinkVersion2; 2565 NvU32 linkBandwidthMBps1, linkBandwidthMBps2; 2566 2567 *nvlinkVersion = NV2080_CTRL_NVLINK_STATUS_NVLINK_VERSION_INVALID; 2568 2569 if (device1->connectedToSwitch && device2->connectedToSwitch) 2570 { 2571 nvlinkVersion1 = getNvlinkConnectionToSwitch(nvlinkStatus1, 2572 &linkBandwidthMBps1); 2573 nvlinkVersion2 = getNvlinkConnectionToSwitch(nvlinkStatus2, 2574 &linkBandwidthMBps2); 2575 } 2576 else 2577 { 2578 OBJGPU *pGpu1, *pGpu2; 2579 2580 pGpu1 = gpumgrGetGpuFromId(device1->gpuId); 2581 if (!pGpu1) 2582 return NV_ERR_OBJECT_NOT_FOUND; 2583 2584 pGpu2 = gpumgrGetGpuFromId(device2->gpuId); 2585 if (!pGpu2) 2586 return NV_ERR_OBJECT_NOT_FOUND; 2587 2588 nvlinkVersion1 = getNvlinkConnectionToGpu(nvlinkStatus1, 2589 pGpu2, 2590 &linkBandwidthMBps1); 2591 nvlinkVersion2 = getNvlinkConnectionToGpu(nvlinkStatus2, 2592 pGpu1, 2593 &linkBandwidthMBps2); 2594 } 2595 2596 if (nvlinkVersion1 == NV2080_CTRL_NVLINK_STATUS_NVLINK_VERSION_INVALID || 2597 nvlinkVersion2 == NV2080_CTRL_NVLINK_STATUS_NVLINK_VERSION_INVALID) 2598 { 2599 *linkBandwidthMBps = 0; 2600 return NV_OK; 2601 } 2602 2603 // NVLink1 devices cannot be mixed with other versions. NVLink3 supports 2604 // mixing NVLink2 and NVLink3 devices. NVLink4 devices cannot be mixed with 2605 // prior NVLink versions. 2606 if (nvlinkVersion1 == NV2080_CTRL_NVLINK_STATUS_NVLINK_VERSION_1_0 || 2607 nvlinkVersion2 == NV2080_CTRL_NVLINK_STATUS_NVLINK_VERSION_1_0 || 2608 nvlinkVersion1 == NV2080_CTRL_NVLINK_STATUS_NVLINK_VERSION_4_0 || 2609 nvlinkVersion2 == NV2080_CTRL_NVLINK_STATUS_NVLINK_VERSION_4_0) 2610 { 2611 NV_ASSERT(nvlinkVersion1 == nvlinkVersion2); 2612 NV_ASSERT(linkBandwidthMBps1 == linkBandwidthMBps2); 2613 } 2614 2615 *nvlinkVersion = NV_MIN(nvlinkVersion1, nvlinkVersion2); 2616 *linkBandwidthMBps = NV_MIN(linkBandwidthMBps1, linkBandwidthMBps2); 2617 2618 return NV_OK; 2619 } 2620 2621 NV_STATUS nvGpuOpsGetP2PCaps(struct gpuDevice *device1, 2622 struct gpuDevice *device2, 2623 getP2PCapsParams *p2pCapsParams) 2624 { 2625 NV_STATUS status = NV_OK; 2626 NV2080_CTRL_CMD_NVLINK_GET_NVLINK_STATUS_PARAMS *nvlinkStatus1 = NULL; 2627 NV2080_CTRL_CMD_NVLINK_GET_NVLINK_STATUS_PARAMS *nvlinkStatus2 = NULL; 2628 struct systemP2PCaps p2pCaps; 2629 2630 if (!device1 || !device2) 2631 return NV_ERR_INVALID_ARGUMENT; 2632 2633 if (device1->session != device2->session) 2634 return NV_ERR_INVALID_ARGUMENT; 2635 2636 if (!p2pCapsParams) 2637 return NV_ERR_INVALID_ARGUMENT; 2638 2639 status = allocNvlinkStatusForSubdevice(device1, &nvlinkStatus1); 2640 if (status != NV_OK) 2641 goto cleanup; 2642 2643 status = allocNvlinkStatusForSubdevice(device2, &nvlinkStatus2); 2644 if (status != NV_OK) 2645 goto cleanup; 2646 2647 portMemSet(p2pCapsParams, 0, sizeof(*p2pCapsParams)); 2648 p2pCapsParams->peerIds[0] = (NvU32)-1; 2649 p2pCapsParams->peerIds[1] = (NvU32)-1; 2650 p2pCapsParams->p2pLink = UVM_LINK_TYPE_NONE; 2651 p2pCapsParams->indirectAccess = NV_FALSE; 2652 2653 status = getSystemP2PCaps(device1, device2, &p2pCaps); 2654 if (status != NV_OK) 2655 goto cleanup; 2656 2657 if (p2pCaps.indirectAccessSupported) 2658 { 2659 NvU32 nvlinkVersion; 2660 NvU32 linkBandwidthMBps; 2661 NvU32 p2pLink; 2662 2663 status = gpusHaveNpuNvlink(nvlinkStatus1, 2664 nvlinkStatus2, 2665 &nvlinkVersion, 2666 &linkBandwidthMBps); 2667 if (status != NV_OK) 2668 goto cleanup; 2669 2670 p2pLink = rmControlToUvmNvlinkVersion(nvlinkVersion); 2671 2672 NV_ASSERT(p2pLink >= UVM_LINK_TYPE_NVLINK_2); 2673 NV_ASSERT(linkBandwidthMBps == 0); 2674 2675 p2pCapsParams->indirectAccess = NV_TRUE; 2676 p2pCapsParams->p2pLink = p2pLink; 2677 p2pCapsParams->optimalNvlinkWriteCEs[0] = p2pCaps.optimalNvlinkWriteCEs[0]; 2678 p2pCapsParams->optimalNvlinkWriteCEs[1] = p2pCaps.optimalNvlinkWriteCEs[1]; 2679 p2pCapsParams->totalLinkLineRateMBps = linkBandwidthMBps; 2680 } 2681 else if (p2pCaps.accessSupported) 2682 { 2683 p2pCapsParams->peerIds[0] = p2pCaps.peerIds[0]; 2684 p2pCapsParams->peerIds[1] = p2pCaps.peerIds[1]; 2685 2686 if (p2pCaps.nvlinkSupported) 2687 { 2688 NvU32 nvlinkVersion; 2689 NvU32 linkBandwidthMBps; 2690 2691 NV_ASSERT(p2pCaps.atomicSupported); 2692 2693 status = getNvlinkP2PCaps(device1, 2694 device2, 2695 nvlinkStatus1, 2696 nvlinkStatus2, 2697 &nvlinkVersion, 2698 &linkBandwidthMBps); 2699 if (status != NV_OK) 2700 goto cleanup; 2701 2702 p2pCapsParams->p2pLink = rmControlToUvmNvlinkVersion(nvlinkVersion); 2703 p2pCapsParams->optimalNvlinkWriteCEs[0] = p2pCaps.optimalNvlinkWriteCEs[0]; 2704 p2pCapsParams->optimalNvlinkWriteCEs[1] = p2pCaps.optimalNvlinkWriteCEs[1]; 2705 2706 NV_ASSERT(p2pCapsParams->p2pLink != UVM_LINK_TYPE_NONE); 2707 NV_ASSERT(linkBandwidthMBps != 0); 2708 2709 p2pCapsParams->totalLinkLineRateMBps = linkBandwidthMBps; 2710 } 2711 else 2712 { 2713 NvU32 linkBandwidthMBps1, linkBandwidthMBps2; 2714 2715 status = getPCIELinkRateMBps(device1, &linkBandwidthMBps1); 2716 if (status != NV_OK) 2717 goto cleanup; 2718 2719 status = getPCIELinkRateMBps(device2, &linkBandwidthMBps2); 2720 if (status != NV_OK) 2721 goto cleanup; 2722 2723 p2pCapsParams->p2pLink = UVM_LINK_TYPE_PCIE; 2724 p2pCapsParams->totalLinkLineRateMBps = NV_MIN(linkBandwidthMBps1, linkBandwidthMBps2); 2725 } 2726 } 2727 2728 cleanup: 2729 portMemFree(nvlinkStatus1); 2730 portMemFree(nvlinkStatus2); 2731 2732 return status; 2733 } 2734 2735 static NV_STATUS nvGpuOpsGetExternalAllocP2pInfo(struct gpuSession *session, 2736 NvU32 memOwnerGpuId, 2737 NvU32 gpuId, 2738 NvBool *isPeerSupported, 2739 NvU32 *peerId) 2740 { 2741 NV_STATUS status = NV_OK; 2742 NV0000_CTRL_SYSTEM_GET_P2P_CAPS_V2_PARAMS *p2pCapsParams = NULL; 2743 RM_API *pRmApi = rmapiGetInterface(RMAPI_GPU_LOCK_INTERNAL); 2744 2745 NV_ASSERT(gpuId != memOwnerGpuId); 2746 2747 p2pCapsParams = portMemAllocNonPaged(sizeof(*p2pCapsParams)); 2748 if (p2pCapsParams == NULL) 2749 { 2750 status = NV_ERR_NO_MEMORY; 2751 goto done; 2752 } 2753 2754 portMemSet(p2pCapsParams, 0, sizeof(*p2pCapsParams)); 2755 p2pCapsParams->gpuIds[0] = gpuId; 2756 p2pCapsParams->gpuIds[1] = memOwnerGpuId; 2757 p2pCapsParams->gpuCount = 2; 2758 2759 status = pRmApi->Control(pRmApi, 2760 session->handle, 2761 session->handle, 2762 NV0000_CTRL_CMD_SYSTEM_GET_P2P_CAPS_V2, 2763 p2pCapsParams, 2764 sizeof(*p2pCapsParams)); 2765 if (status != NV_OK) 2766 goto done; 2767 2768 *isPeerSupported = 2769 (REF_VAL(NV0000_CTRL_SYSTEM_GET_P2P_CAPS_WRITES_SUPPORTED, p2pCapsParams->p2pCaps) && 2770 REF_VAL(NV0000_CTRL_SYSTEM_GET_P2P_CAPS_READS_SUPPORTED, p2pCapsParams->p2pCaps)); 2771 2772 *peerId = p2pCapsParams->busPeerIds[0 * 2 + 1]; 2773 2774 done: 2775 portMemFree(p2pCapsParams); 2776 return status; 2777 } 2778 2779 static GMMU_APERTURE nvGpuOpsGetExternalAllocAperture(PMEMORY_DESCRIPTOR pMemDesc, 2780 NvBool isIndirectPeerSupported, 2781 NvBool isPeerSupported) 2782 { 2783 // Don't support both direct and indirect peers 2784 NV_ASSERT(!(isIndirectPeerSupported && isPeerSupported)); 2785 2786 // Get the aperture 2787 if (memdescGetAddressSpace(pMemDesc) == ADDR_FBMEM) 2788 { 2789 if (isIndirectPeerSupported) 2790 return GMMU_APERTURE_SYS_COH; 2791 2792 if (isPeerSupported) 2793 return GMMU_APERTURE_PEER; 2794 2795 return GMMU_APERTURE_VIDEO; 2796 } 2797 else if ( 2798 (memdescGetAddressSpace(pMemDesc) == ADDR_FABRIC_MC) || 2799 (memdescGetAddressSpace(pMemDesc) == ADDR_FABRIC_V2)) 2800 { 2801 return GMMU_APERTURE_PEER; 2802 } 2803 else 2804 { 2805 return GMMU_APERTURE_SYS_COH; 2806 } 2807 } 2808 2809 static NvBool nvGpuOpsGetExternalAllocVolatility(PMEMORY_DESCRIPTOR pMemDesc, 2810 GMMU_APERTURE aperture, 2811 NvBool isIndirectPeerSupported, 2812 UvmRmGpuCachingType cachingType) 2813 { 2814 if (cachingType == UvmRmGpuCachingTypeDefault) 2815 { 2816 if (aperture == GMMU_APERTURE_PEER || isIndirectPeerSupported) 2817 return (memdescGetGpuP2PCacheAttrib(pMemDesc) == NV_MEMORY_UNCACHED) ? NV_TRUE : NV_FALSE; 2818 else 2819 return (memdescGetGpuCacheAttrib(pMemDesc) == NV_MEMORY_UNCACHED) ? NV_TRUE : NV_FALSE; 2820 } 2821 else if (cachingType == UvmRmGpuCachingTypeForceUncached) 2822 { 2823 return NV_TRUE; 2824 } 2825 else 2826 { 2827 return NV_FALSE; 2828 } 2829 } 2830 2831 static NV_STATUS nvGpuOpsGetExternalAllocMappingAttribute(UvmRmGpuMappingType mappingType, 2832 PMEMORY_DESCRIPTOR pMemDesc, 2833 NvBool *readOnly, 2834 NvBool *atomic) 2835 { 2836 *readOnly = NV_FALSE; 2837 *atomic = NV_FALSE; 2838 2839 if (memdescGetFlag(pMemDesc, MEMDESC_FLAGS_DEVICE_READ_ONLY)) 2840 { 2841 if (mappingType != UvmRmGpuMappingTypeDefault && 2842 mappingType != UvmRmGpuMappingTypeReadOnly) 2843 return NV_ERR_INVALID_ACCESS_TYPE; 2844 2845 *readOnly = NV_TRUE; 2846 *atomic = NV_FALSE; 2847 } 2848 else 2849 { 2850 *readOnly = (mappingType == UvmRmGpuMappingTypeReadOnly); 2851 *atomic = (mappingType == UvmRmGpuMappingTypeDefault || 2852 mappingType == UvmRmGpuMappingTypeReadWriteAtomic); 2853 } 2854 2855 return NV_OK; 2856 } 2857 2858 static NV_STATUS nvGpuOpsGetPteKind(OBJGPU *pMappingGpu, 2859 MemoryManager *pMemoryManager, 2860 PMEMORY_DESCRIPTOR pMemDesc, 2861 Memory *pMemory, 2862 gpuExternalMappingInfo *pGpuExternalMappingInfo, 2863 NvU32 *newKind) 2864 { 2865 NV_STATUS status = NV_OK; 2866 FB_ALLOC_PAGE_FORMAT fbAllocPageFormat = {0}; 2867 NvU32 ctagId; 2868 2869 if (pGpuExternalMappingInfo->compressionType == UvmRmGpuCompressionTypeEnabledNoPlc) 2870 { 2871 if (memmgrIsKind_HAL(pMemoryManager, FB_IS_KIND_COMPRESSIBLE, *newKind)) 2872 { 2873 status = memmgrChooseKind_HAL(pMappingGpu, 2874 pMemoryManager, 2875 &fbAllocPageFormat, 2876 NVOS32_ATTR_COMPR_DISABLE_PLC_ANY, 2877 newKind); 2878 } 2879 else 2880 { 2881 status = NV_ERR_INVALID_ARGUMENT; 2882 } 2883 2884 if (status != NV_OK) 2885 return status; 2886 } 2887 2888 if (pGpuExternalMappingInfo->formatType != UvmRmGpuFormatTypeDefault) 2889 { 2890 NV_ASSERT(pGpuExternalMappingInfo->elementBits != UvmRmGpuFormatElementBitsDefault); 2891 2892 fbAllocPageFormat.attr = pMemory->Attr; 2893 fbAllocPageFormat.attr2 = pMemory->Attr2; 2894 fbAllocPageFormat.flags = pMemory->Flags; 2895 fbAllocPageFormat.type = pMemory->Type; 2896 2897 switch (pGpuExternalMappingInfo->formatType) 2898 { 2899 case UvmRmGpuFormatTypeBlockLinear: 2900 fbAllocPageFormat.attr = FLD_SET_DRF(OS32, _ATTR, _FORMAT, _BLOCK_LINEAR, fbAllocPageFormat.attr); 2901 break; 2902 default: 2903 break; 2904 } 2905 2906 switch (pGpuExternalMappingInfo->elementBits) 2907 { 2908 case UvmRmGpuFormatElementBits8: 2909 fbAllocPageFormat.attr = FLD_SET_DRF(OS32, _ATTR, _DEPTH, _8, fbAllocPageFormat.attr); 2910 break; 2911 case UvmRmGpuFormatElementBits16: 2912 fbAllocPageFormat.attr = FLD_SET_DRF(OS32, _ATTR, _DEPTH, _16, fbAllocPageFormat.attr); 2913 break; 2914 // CUDA does not support 24-bit width 2915 case UvmRmGpuFormatElementBits32: 2916 fbAllocPageFormat.attr = FLD_SET_DRF(OS32, _ATTR, _DEPTH, _32, fbAllocPageFormat.attr); 2917 break; 2918 case UvmRmGpuFormatElementBits64: 2919 fbAllocPageFormat.attr = FLD_SET_DRF(OS32, _ATTR, _DEPTH, _64, fbAllocPageFormat.attr); 2920 break; 2921 case UvmRmGpuFormatElementBits128: 2922 fbAllocPageFormat.attr = FLD_SET_DRF(OS32, _ATTR, _DEPTH, _128, fbAllocPageFormat.attr); 2923 break; 2924 default: 2925 break; 2926 } 2927 2928 status = memmgrChooseKind_HAL(pMappingGpu, pMemoryManager, &fbAllocPageFormat, 2929 DRF_VAL(OS32, _ATTR, _COMPR, fbAllocPageFormat.attr), 2930 newKind); 2931 if (status != NV_OK) 2932 { 2933 NV_PRINTF(LEVEL_ERROR, "Invalid kind type (%x)\n", *newKind); 2934 return status; 2935 } 2936 2937 // 2938 // Check that the memory descriptor already has allocated comptags 2939 // if the new mapping enables compression. Downgrade the kind if no 2940 // comptags are present. 2941 // 2942 ctagId = FB_HWRESID_CTAGID_VAL_FERMI(memdescGetHwResId(pMemDesc)); 2943 if (memmgrIsKind_HAL(pMemoryManager, FB_IS_KIND_COMPRESSIBLE, *newKind) && !ctagId) 2944 *newKind = memmgrGetUncompressedKind_HAL(pMappingGpu, pMemoryManager, *newKind, NV_FALSE); 2945 2946 if (*newKind == NV_MMU_PTE_KIND_INVALID) 2947 return NV_ERR_INVALID_ARGUMENT; 2948 } 2949 else 2950 { 2951 NV_ASSERT((pGpuExternalMappingInfo->elementBits == UvmRmGpuFormatElementBitsDefault) || 2952 (pGpuExternalMappingInfo->elementBits == UvmRmGpuFormatElementBits8)); 2953 } 2954 2955 return NV_OK; 2956 } 2957 2958 static 2959 NV_STATUS 2960 nvGpuOpsMemGetPageSize 2961 ( 2962 OBJGPU *pGpu, 2963 MEMORY_DESCRIPTOR *pMemDesc, 2964 NvU64 *pPageSize 2965 ) 2966 { 2967 NvU64 pageSize; 2968 MemoryManager *pMemoryManager = GPU_GET_MEMORY_MANAGER(pGpu); 2969 NV_STATUS status; 2970 2971 pageSize = memdescGetPageSize(pMemDesc, AT_GPU); 2972 if (pageSize == 0) 2973 { 2974 status = memmgrSetMemDescPageSize_HAL(pGpu, 2975 pMemoryManager, 2976 pMemDesc, 2977 AT_GPU, 2978 RM_ATTR_PAGE_SIZE_DEFAULT); 2979 if (status != NV_OK) 2980 return status; 2981 2982 pageSize = memdescGetPageSize(pMemDesc, AT_GPU); 2983 NV_ASSERT(pageSize != 0); 2984 } 2985 2986 *pPageSize = pageSize; 2987 2988 return NV_OK; 2989 } 2990 2991 static 2992 NV_STATUS 2993 nvGpuOpsBuildExternalAllocPtes 2994 ( 2995 OBJVASPACE *pVAS, 2996 OBJGPU *pMappingGpu, 2997 MEMORY_DESCRIPTOR *pMemDesc, 2998 Memory *pMemory, 2999 NvU64 offset, 3000 NvU64 size, 3001 NvBool isIndirectPeerSupported, 3002 NvBool isPeerSupported, 3003 NvU32 peerId, 3004 gpuExternalMappingInfo *pGpuExternalMappingInfo 3005 ) 3006 { 3007 NV_STATUS status = NV_OK; 3008 OBJGVASPACE *pGVAS = NULL; 3009 const GMMU_FMT *pFmt = NULL; 3010 const GMMU_FMT_PTE *pPteFmt = NULL; 3011 const MMU_FMT_LEVEL *pLevelFmt = NULL; 3012 GMMU_APERTURE aperture; 3013 COMPR_INFO comprInfo; 3014 GMMU_ENTRY_VALUE pte = {{0}}; 3015 3016 NvU64 fabricBaseAddress = NVLINK_INVALID_FABRIC_ADDR; 3017 NvU32 kind; 3018 NvU64 pageSize; 3019 NvU32 skipPteCount; 3020 NvBool vol, atomic, readOnly; 3021 NvBool encrypted, privileged; 3022 NvU64 iter, physAddr, mappingSize, pteCount; 3023 MemoryManager *pMemoryManager = GPU_GET_MEMORY_MANAGER(pMappingGpu); 3024 KernelGmmu *pKernelGmmu = GPU_GET_KERNEL_GMMU(pMappingGpu); 3025 NvU64 allocSize; 3026 NvBool isCompressedKind; 3027 NvU64 *physicalAddresses = NULL; 3028 NvU32 newKind, oldKind; 3029 NvBool kindChanged = NV_FALSE; 3030 NvU64 gpaOffset; 3031 NvBool *isPLCable = NULL; 3032 NvU64 *guestPhysicalAddress = NULL; 3033 NvU64 mappingPageSize = pGpuExternalMappingInfo->mappingPageSize; 3034 3035 NV_ASSERT(!memdescHasSubDeviceMemDescs(pMemDesc)); 3036 3037 status = nvGpuOpsMemGetPageSize(pMappingGpu, 3038 pMemDesc, 3039 &pageSize); 3040 if (status != NV_OK) 3041 return status; 3042 3043 // 3044 // Default mappingPageSize to allocation's page size if passed as 0. 3045 // If mappingPageSize is non-zero, it must be a multiple of pageSize. 3046 // Also, mapping page size cannot be larger than alloc page size. 3047 // 3048 if (mappingPageSize == 0) 3049 { 3050 mappingPageSize = pageSize; 3051 } 3052 else if ((mappingPageSize > pageSize) || 3053 (pageSize % mappingPageSize != 0)) 3054 { 3055 return NV_ERR_INVALID_ARGUMENT; 3056 } 3057 3058 // memdescGetSize returns the requested size of the allocation. But, the 3059 // actual allocation size could be larger than the requested size due 3060 // to alignment requirement. So, make sure the correct size is used. 3061 // Note, alignment can be greater than the pageSize. 3062 allocSize = RM_ALIGN_UP(pMemDesc->ActualSize, pageSize); 3063 3064 if (offset >= allocSize) 3065 return NV_ERR_INVALID_BASE; 3066 3067 if ((offset + size) > allocSize) 3068 return NV_ERR_INVALID_LIMIT; 3069 3070 if ((size & (mappingPageSize - 1)) != 0) 3071 return NV_ERR_INVALID_ARGUMENT; 3072 3073 if ((offset & (mappingPageSize - 1)) != 0) 3074 return NV_ERR_INVALID_ARGUMENT; 3075 3076 pGVAS = dynamicCast(pVAS, OBJGVASPACE); 3077 3078 // Get the GMMU format 3079 pFmt = gvaspaceGetGmmuFmt(pGVAS, pMappingGpu); 3080 pPteFmt = (GMMU_FMT_PTE*)pFmt->pPte; 3081 pLevelFmt = mmuFmtFindLevelWithPageShift(pFmt->pRoot, BIT_IDX_64(mappingPageSize)); 3082 3083 oldKind = newKind = memdescGetPteKindForGpu(pMemDesc, pMappingGpu); 3084 if (pMemory) 3085 { 3086 // 3087 // The physical memory layout can be specified after allocation using 3088 // UvmMapExternalAllocation, so the kind attribute needs to be computed 3089 // again 3090 // 3091 status = nvGpuOpsGetPteKind(pMappingGpu, pMemoryManager, pMemDesc, pMemory, 3092 pGpuExternalMappingInfo, &newKind); 3093 3094 if (status != NV_OK) 3095 return status; 3096 3097 if (oldKind != newKind) 3098 { 3099 memdescSetPteKindForGpu(pMemDesc, pMappingGpu, newKind); 3100 kindChanged = NV_TRUE; 3101 } 3102 } 3103 3104 // Get the CompTag range and Kind. 3105 status = memmgrGetKindComprForGpu_HAL(pMemoryManager, pMemDesc, pMappingGpu, 0, &kind, &comprInfo); 3106 if (status != NV_OK) 3107 return status; 3108 3109 if (kindChanged) 3110 memdescSetPteKindForGpu(pMemDesc, pMappingGpu, oldKind); 3111 3112 aperture = nvGpuOpsGetExternalAllocAperture(pMemDesc, isIndirectPeerSupported, isPeerSupported); 3113 3114 vol = nvGpuOpsGetExternalAllocVolatility(pMemDesc, aperture, isIndirectPeerSupported, 3115 pGpuExternalMappingInfo->cachingType); 3116 3117 status = nvGpuOpsGetExternalAllocMappingAttribute(pGpuExternalMappingInfo->mappingType, 3118 pMemDesc, 3119 &readOnly, 3120 &atomic); 3121 if (status != NV_OK) 3122 return status; 3123 3124 encrypted = memdescGetFlag(pMemDesc, MEMDESC_FLAGS_ENCRYPTED); 3125 3126 privileged = memdescGetFlag(pMemDesc, MEMDESC_FLAGS_GPU_PRIVILEGED); 3127 3128 mappingSize = size ? size : allocSize; 3129 3130 skipPteCount = pLevelFmt->entrySize / sizeof(NvU64); 3131 3132 isCompressedKind = memmgrIsKind_HAL(pMemoryManager, FB_IS_KIND_COMPRESSIBLE, kind); 3133 3134 // 3135 // Specifying mapping page size for compressed 3136 // allocations is not yet supported. 3137 // 3138 if (isCompressedKind && (pGpuExternalMappingInfo->mappingPageSize != 0) && 3139 (pGpuExternalMappingInfo->mappingPageSize != pageSize)) 3140 { 3141 return NV_ERR_NOT_SUPPORTED; 3142 } 3143 3144 pteCount = NV_MIN((pGpuExternalMappingInfo->pteBufferSize / pLevelFmt->entrySize), 3145 (mappingSize / mappingPageSize)); 3146 if (!pteCount) 3147 return NV_ERR_BUFFER_TOO_SMALL; 3148 3149 if (pFmt->version == GMMU_FMT_VERSION_3) 3150 { 3151 NvU32 ptePcfSw = 0; 3152 NvU32 ptePcfHw = 0; 3153 3154 nvFieldSetBool(&pPteFmt->fldValid, NV_TRUE, pte.v8); 3155 gmmuFieldSetAperture(&pPteFmt->fldAperture, aperture, pte.v8); 3156 nvFieldSet32(&pPteFmt->fldKind, kind, pte.v8); 3157 3158 ptePcfSw |= vol ? (1 << SW_MMU_PCF_UNCACHED_IDX) : 0; 3159 ptePcfSw |= readOnly ? (1 << SW_MMU_PCF_RO_IDX) : 0; 3160 ptePcfSw |= !atomic ? (1 << SW_MMU_PCF_NOATOMIC_IDX) : 0; 3161 ptePcfSw |= !privileged ? (1 << SW_MMU_PCF_REGULAR_IDX) : 0; 3162 3163 if ((memdescGetAddressSpace(pMemDesc) == ADDR_FABRIC_MC)) 3164 { 3165 ptePcfSw |= (1 << SW_MMU_PCF_ACE_IDX); 3166 } 3167 3168 NV_CHECK_OR_RETURN(LEVEL_ERROR, 3169 (kgmmuTranslatePtePcfFromSw_HAL(pKernelGmmu, ptePcfSw, &ptePcfHw) == NV_OK), 3170 NV_ERR_INVALID_ARGUMENT); 3171 nvFieldSet32(&pPteFmt->fldPtePcf, ptePcfHw, pte.v8); 3172 } 3173 else 3174 { 3175 if (nvFieldIsValid32(&pPteFmt->fldValid.desc)) 3176 nvFieldSetBool(&pPteFmt->fldValid, NV_TRUE, pte.v8); 3177 3178 if (nvFieldIsValid32(&pPteFmt->fldVolatile.desc)) 3179 nvFieldSetBool(&pPteFmt->fldVolatile, vol, pte.v8); 3180 3181 if (nvFieldIsValid32(&pPteFmt->fldPrivilege.desc)) 3182 nvFieldSetBool(&pPteFmt->fldPrivilege, privileged, pte.v8); 3183 3184 if (nvFieldIsValid32(&pPteFmt->fldEncrypted.desc)) 3185 nvFieldSetBool(&pPteFmt->fldEncrypted, encrypted, pte.v8); 3186 3187 if (nvFieldIsValid32(&pPteFmt->fldReadOnly.desc)) 3188 nvFieldSetBool(&pPteFmt->fldReadOnly, readOnly, pte.v8); 3189 3190 if (nvFieldIsValid32(&pPteFmt->fldWriteDisable.desc)) 3191 nvFieldSetBool(&pPteFmt->fldWriteDisable, readOnly, pte.v8); 3192 3193 if (nvFieldIsValid32(&pPteFmt->fldReadDisable.desc)) 3194 nvFieldSetBool(&pPteFmt->fldReadDisable, NV_FALSE, pte.v8); 3195 3196 if (nvFieldIsValid32(&pPteFmt->fldAtomicDisable.desc)) 3197 nvFieldSetBool(&pPteFmt->fldAtomicDisable, !atomic, pte.v8); 3198 3199 gmmuFieldSetAperture(&pPteFmt->fldAperture, aperture, pte.v8); 3200 3201 if (!isCompressedKind) 3202 { 3203 nvFieldSet32(&pPteFmt->fldKind, kind, pte.v8); 3204 nvFieldSet32(&pPteFmt->fldCompTagLine, 0, pte.v8); 3205 if (nvFieldIsValid32(&pPteFmt->fldCompTagSubIndex)) 3206 nvFieldSet32(&pPteFmt->fldCompTagSubIndex, 0, pte.v8); 3207 } 3208 } 3209 3210 if (aperture == GMMU_APERTURE_PEER) 3211 { 3212 FlaMemory* pFlaMemory = dynamicCast(pMemory, FlaMemory); 3213 nvFieldSet32(&pPteFmt->fldPeerIndex, peerId, pte.v8); 3214 3215 if ( 3216 (memdescGetAddressSpace(pMemDesc) == ADDR_FABRIC_MC) || 3217 (memdescGetAddressSpace(pMemDesc) == ADDR_FABRIC_V2) || pFlaMemory) 3218 { 3219 // 3220 // Any fabric memory descriptors are pre-encoded with the fabric base address 3221 // use NVLINK_INVALID_FABRIC_ADDR to avoid encoding twice 3222 // 3223 fabricBaseAddress = NVLINK_INVALID_FABRIC_ADDR; 3224 } 3225 else 3226 { 3227 KernelNvlink *pKernelNvlink = GPU_GET_KERNEL_NVLINK(pMemDesc->pGpu); 3228 if (pKernelNvlink == NULL) 3229 { 3230 fabricBaseAddress = NVLINK_INVALID_FABRIC_ADDR; 3231 } 3232 else 3233 { 3234 fabricBaseAddress = knvlinkGetUniqueFabricBaseAddress(pMemDesc->pGpu, pKernelNvlink); 3235 } 3236 } 3237 } 3238 3239 // 3240 // Both memdescGetPhysAddr() and kgmmuEncodePhysAddr() have pretty high overhead. 3241 // To avoid it, allocate an array for the physical addresses and use the 3242 // flavors of the APIs that work on multiple addresses at a time. 3243 // 3244 // Notably the pteBuffer array could be re-used for that, but it gets a bit 3245 // tricky if skipPteCount is greater than 1 so just keep it simple. 3246 // 3247 physicalAddresses = portMemAllocNonPaged((NvU32)pteCount * sizeof(*physicalAddresses)); 3248 if (physicalAddresses == NULL) 3249 return NV_ERR_NO_MEMORY; 3250 3251 // 3252 // Ask for physical addresses for the GPU being mapped as it may not be the 3253 // same as the GPU owning the memdesc. This matters for sysmem as accessing 3254 // it requires IOMMU mappings to be set up and these are different for each 3255 // GPU. The IOMMU mappings are currently added by nvGpuOpsDupMemory(). 3256 // 3257 memdescGetPhysAddrsForGpu(pMemDesc, pMappingGpu, AT_GPU, offset, mappingPageSize, 3258 pteCount, physicalAddresses); 3259 kgmmuEncodePhysAddrs(pKernelGmmu, aperture, physicalAddresses, fabricBaseAddress, pteCount); 3260 3261 3262 // 3263 // Get information whether given physical address needs PLCable kind 3264 // 3265 if (IS_VIRTUAL_WITH_SRIOV(pMappingGpu) && 3266 gpuIsWarBug200577889SriovHeavyEnabled(pMappingGpu) && 3267 isCompressedKind && 3268 !memmgrIsKind_HAL(pMemoryManager, FB_IS_KIND_DISALLOW_PLC, comprInfo.kind)) 3269 { 3270 guestPhysicalAddress = portMemAllocNonPaged((NvU32)pteCount * sizeof(*guestPhysicalAddress)); 3271 if (guestPhysicalAddress == NULL) 3272 { 3273 status = NV_ERR_NO_MEMORY; 3274 goto done; 3275 } 3276 3277 portMemSet(guestPhysicalAddress, 0, ((NvU32)pteCount * sizeof(*guestPhysicalAddress))); 3278 3279 gpaOffset = offset; 3280 for (iter = 0; iter < pteCount; iter++) 3281 { 3282 guestPhysicalAddress[iter] = gpaOffset; 3283 gpaOffset += mappingPageSize; 3284 } 3285 3286 isPLCable = portMemAllocNonPaged((NvU32)pteCount * sizeof(*isPLCable)); 3287 if (isPLCable == NULL) 3288 { 3289 status = NV_ERR_NO_MEMORY; 3290 goto done; 3291 } 3292 3293 portMemSet(isPLCable, 0, ((NvU32)pteCount * sizeof(*isPLCable))); 3294 3295 NV_RM_RPC_GET_PLCABLE_ADDRESS_KIND(pMappingGpu, guestPhysicalAddress, mappingPageSize, (NvU32)pteCount, 3296 isPLCable, status); 3297 if (status != NV_OK) 3298 goto done; 3299 } 3300 3301 for (iter = 0; iter < pteCount; iter++) 3302 { 3303 physAddr = physicalAddresses[iter]; 3304 3305 gmmuFieldSetAddress(gmmuFmtPtePhysAddrFld(pPteFmt, aperture), 3306 physAddr, 3307 pte.v8); 3308 3309 if (isCompressedKind) 3310 { 3311 // We have to reset pte.v8 fields in care of partially compressed allocations 3312 // Otherwise, non-compressed PTEs will get bits from compressed PTEs 3313 if (pFmt->version <= GMMU_FMT_VERSION_2) 3314 { 3315 NvBool bIsWarApplied = NV_FALSE; 3316 NvU32 savedKind = comprInfo.kind; 3317 MemoryManager *pMemoryManager = GPU_GET_MEMORY_MANAGER(pMappingGpu); 3318 KernelMemorySystem *pKernelMemorySystem = GPU_GET_KERNEL_MEMORY_SYSTEM(pMappingGpu); 3319 const MEMORY_SYSTEM_STATIC_CONFIG *pMemorySystemConfig = 3320 kmemsysGetStaticConfig(pMappingGpu, pKernelMemorySystem); 3321 3322 nvFieldSet32(&pPteFmt->fldKind, 0, pte.v8); 3323 nvFieldSet32(&pPteFmt->fldCompTagLine, 0, pte.v8); 3324 if (nvFieldIsValid32(&pPteFmt->fldCompTagSubIndex)) 3325 nvFieldSet32(&pPteFmt->fldCompTagSubIndex, 0, pte.v8); 3326 3327 if (pMemorySystemConfig->bUseRawModeComptaglineAllocation && 3328 pMemorySystemConfig->bDisablePlcForCertainOffsetsBug3046774 && 3329 !memmgrIsKind_HAL(pMemoryManager, FB_IS_KIND_DISALLOW_PLC, comprInfo.kind)) 3330 { 3331 NvBool bEnablePlc = NV_TRUE; 3332 3333 if (IS_VIRTUAL_WITH_SRIOV(pMappingGpu) && 3334 gpuIsWarBug200577889SriovHeavyEnabled(pMappingGpu)) 3335 { 3336 bEnablePlc = isPLCable[iter]; 3337 } 3338 else 3339 { 3340 bEnablePlc = kmemsysIsPagePLCable_HAL(pMappingGpu, pKernelMemorySystem, 3341 offset, mappingPageSize); 3342 } 3343 3344 if (!bEnablePlc) 3345 { 3346 bIsWarApplied = NV_TRUE; 3347 memmgrGetDisablePlcKind_HAL(pMemoryManager, &comprInfo.kind); 3348 } 3349 } 3350 3351 kgmmuFieldSetKindCompTags(GPU_GET_KERNEL_GMMU(pMappingGpu), pFmt, pLevelFmt, &comprInfo, physAddr, 3352 offset, mmuFmtVirtAddrToEntryIndex(pLevelFmt, offset), pte.v8); 3353 // 3354 // restore the kind to PLC if changd, since kind is associated with entire surface, and the WAR applies to 3355 // individual pages in the surface. 3356 if (bIsWarApplied) 3357 comprInfo.kind = savedKind; 3358 } 3359 } 3360 3361 portMemCopy(&pGpuExternalMappingInfo->pteBuffer[iter * skipPteCount], pLevelFmt->entrySize, pte.v8, pLevelFmt->entrySize); 3362 3363 offset += mappingPageSize; 3364 } 3365 3366 pGpuExternalMappingInfo->numWrittenPtes = pteCount; 3367 pGpuExternalMappingInfo->numRemainingPtes = (mappingSize / mappingPageSize) - pteCount; 3368 pGpuExternalMappingInfo->pteSize = pLevelFmt->entrySize; 3369 3370 done: 3371 portMemFree(physicalAddresses); 3372 3373 portMemFree(guestPhysicalAddress); 3374 3375 portMemFree(isPLCable); 3376 3377 return status; 3378 } 3379 3380 NV_STATUS nvGpuOpsGetExternalAllocPtes(struct gpuAddressSpace *vaSpace, 3381 NvHandle hMemory, 3382 NvU64 offset, 3383 NvU64 size, 3384 gpuExternalMappingInfo *pGpuExternalMappingInfo) 3385 { 3386 NV_STATUS status = NV_OK; 3387 nvGpuOpsLockSet acquiredLocks; 3388 THREAD_STATE_NODE threadState; 3389 Memory *pMemory = NULL; 3390 PMEMORY_DESCRIPTOR pMemDesc = NULL; 3391 OBJGPU *pMappingGpu = NULL; 3392 NvU32 peerId = 0; 3393 NvBool isSliSupported = NV_FALSE; 3394 NvBool isPeerSupported = NV_FALSE; 3395 NvBool isIndirectPeerSupported = NV_FALSE; 3396 OBJVASPACE *pVAS = NULL; 3397 FlaMemory *pFlaMemory = NULL; 3398 OBJGPU *pSrcGpu = NULL; 3399 OBJGPU *pPeerGpu = NULL; 3400 RsClient *pClient; 3401 MEMORY_DESCRIPTOR *pAdjustedMemDesc = NULL; 3402 FABRIC_VASPACE *pFabricVAS = NULL; 3403 3404 if (!pGpuExternalMappingInfo || !hMemory || !vaSpace) 3405 return NV_ERR_INVALID_ARGUMENT; 3406 3407 threadStateInit(&threadState, THREAD_STATE_FLAGS_NONE); 3408 status = _nvGpuOpsLocksAcquireAll(RMAPI_LOCK_FLAGS_READ, 3409 vaSpace->device->session->handle, 3410 &pClient, 3411 &acquiredLocks); 3412 if (status != NV_OK) 3413 { 3414 threadStateFree(&threadState, THREAD_STATE_FLAGS_NONE); 3415 return status; 3416 } 3417 3418 status = vaspaceGetByHandleOrDeviceDefault(pClient, 3419 vaSpace->device->handle, 3420 vaSpace->handle, 3421 &pVAS); 3422 if (status != NV_OK) 3423 goto done; 3424 3425 status = nvGpuOpsGetMemoryByHandle(vaSpace->device->session->handle, 3426 hMemory, 3427 &pMemory); 3428 if (status != NV_OK) 3429 goto done; 3430 3431 // RM client allocations can't have multiple subDevice memdescs. 3432 pMemDesc = pMemory->pMemDesc; 3433 NV_ASSERT(!memdescHasSubDeviceMemDescs(pMemDesc)); 3434 3435 // Do not support mapping on anything other than sysmem/vidmem/fabric! 3436 if ((memdescGetAddressSpace(pMemDesc) != ADDR_SYSMEM) && 3437 (memdescGetAddressSpace(pMemDesc) != ADDR_FBMEM) && 3438 (memdescGetAddressSpace(pMemDesc) != ADDR_FABRIC_MC) && 3439 (memdescGetAddressSpace(pMemDesc) != ADDR_FABRIC_V2)) 3440 { 3441 status = NV_ERR_NOT_SUPPORTED; 3442 goto done; 3443 } 3444 3445 status = CliSetGpuContext(vaSpace->device->session->handle, 3446 vaSpace->device->handle, 3447 &pMappingGpu, 3448 NULL); 3449 if (status != NV_OK) 3450 goto done; 3451 3452 pAdjustedMemDesc = pMemDesc; 3453 pFabricVAS = dynamicCast(pMappingGpu->pFabricVAS, FABRIC_VASPACE); 3454 if (pFabricVAS != NULL) 3455 { 3456 status = fabricvaspaceGetGpaMemdesc(pFabricVAS, pMemDesc, pMappingGpu, &pAdjustedMemDesc); 3457 if (status != NV_OK) 3458 goto done; 3459 } 3460 3461 // Check if P2P supported 3462 if ( 3463 (memdescGetAddressSpace(pAdjustedMemDesc) == ADDR_FABRIC_MC) || 3464 (memdescGetAddressSpace(pAdjustedMemDesc) == ADDR_FABRIC_V2)) 3465 { 3466 KernelNvlink *pKernelNvlink = GPU_GET_KERNEL_NVLINK(pMappingGpu); 3467 3468 isPeerSupported = NV_TRUE; 3469 pPeerGpu = pAdjustedMemDesc->pGpu; 3470 peerId = BUS_INVALID_PEER; 3471 3472 if (!memIsGpuMapAllowed(pMemory, pMappingGpu)) 3473 { 3474 NV_PRINTF(LEVEL_ERROR, 3475 "Mapping Gpu is not attached to the given memory object\n"); 3476 status = NV_ERR_INVALID_STATE; 3477 goto freeGpaMemdesc; 3478 } 3479 3480 if (pPeerGpu != NULL) 3481 { 3482 if ((pKernelNvlink != NULL) && 3483 knvlinkIsNvlinkP2pSupported(pMappingGpu, pKernelNvlink, pPeerGpu)) 3484 { 3485 peerId = kbusGetPeerId_HAL(pMappingGpu, GPU_GET_KERNEL_BUS(pMappingGpu), pPeerGpu); 3486 } 3487 } 3488 else 3489 { 3490 peerId = kbusGetNvSwitchPeerId_HAL(pMappingGpu, 3491 GPU_GET_KERNEL_BUS(pMappingGpu)); 3492 } 3493 3494 if (peerId == BUS_INVALID_PEER) 3495 { 3496 status = NV_ERR_INVALID_STATE; 3497 goto freeGpaMemdesc; 3498 } 3499 } 3500 else if (memdescGetAddressSpace(pAdjustedMemDesc) == ADDR_FBMEM && 3501 (pAdjustedMemDesc->pGpu->gpuId != pMappingGpu->gpuId || 3502 dynamicCast(pMemory, FlaMemory))) 3503 { 3504 if (gpumgrCheckIndirectPeer(pAdjustedMemDesc->pGpu, pMappingGpu)) 3505 { 3506 isIndirectPeerSupported = NV_TRUE; 3507 } 3508 else 3509 { 3510 pFlaMemory = dynamicCast(pMemory, FlaMemory); 3511 if (pFlaMemory != NULL) 3512 { 3513 pSrcGpu = gpumgrGetGpu(pFlaMemory->peerGpuInst); 3514 if (!pSrcGpu) 3515 { 3516 status = NV_ERR_INVALID_ARGUMENT; 3517 goto freeGpaMemdesc; 3518 } 3519 } 3520 3521 status = nvGpuOpsGetExternalAllocP2pInfo(vaSpace->device->session, 3522 (pFlaMemory) ? (pSrcGpu->gpuId) :(pAdjustedMemDesc->pGpu->gpuId), 3523 pMappingGpu->gpuId, 3524 &isPeerSupported, 3525 &peerId); 3526 if (status != NV_OK) 3527 goto freeGpaMemdesc; 3528 } 3529 3530 // 3531 // If GPUs are in the same SLI group, don't do peer mappings even if the GPUs are different. In SLI config, 3532 // if a caller can try to map a memory on a GPU other than the GPU which is associated with the memdesc, 3533 // always return local VIDMEM mapping because RM shares a memdesc among such GPUs for client allocations. 3534 // Note: This check could be avoided if we could know that pMemDesc->pGpu is always the SLI master i.e. same 3535 // as the pGPU returned by CliSetGpuContext. 3536 // 3537 if (!pFlaMemory && pAdjustedMemDesc->pGpu->deviceInstance == pMappingGpu->deviceInstance) 3538 { 3539 isPeerSupported = NV_FALSE; 3540 isSliSupported = NV_TRUE; 3541 } 3542 3543 // Even if the RM returns P2P or indirect peer supported, make sure the GPUs are not from different SLI groups. See Bug# 759980. 3544 if ((isPeerSupported || isIndirectPeerSupported) && 3545 (IsSLIEnabled(pMappingGpu) || IsSLIEnabled(pAdjustedMemDesc->pGpu))) 3546 { 3547 status = NV_ERR_NOT_SUPPORTED; 3548 goto freeGpaMemdesc; 3549 } 3550 3551 NV_ASSERT(!(isPeerSupported && isSliSupported)); 3552 3553 // If a caller is trying to map VIDMEM on GPUs with no P2P support and are not in the same SLI group, error out. 3554 if (!isPeerSupported && !isIndirectPeerSupported && !isSliSupported) 3555 { 3556 status = NV_ERR_NOT_SUPPORTED; 3557 goto freeGpaMemdesc; 3558 } 3559 } 3560 3561 status = nvGpuOpsBuildExternalAllocPtes(pVAS, pMappingGpu, pAdjustedMemDesc, pMemory, offset, size, 3562 isIndirectPeerSupported, isPeerSupported, peerId, 3563 pGpuExternalMappingInfo); 3564 3565 freeGpaMemdesc: 3566 if (pAdjustedMemDesc != pMemDesc) 3567 fabricvaspacePutGpaMemdesc(pFabricVAS, pAdjustedMemDesc); 3568 3569 done: 3570 _nvGpuOpsLocksRelease(&acquiredLocks); 3571 threadStateFree(&threadState, THREAD_STATE_FLAGS_NONE); 3572 return status; 3573 } 3574 3575 void nvGpuOpsAddressSpaceDestroy(struct gpuAddressSpace *vaSpace) 3576 { 3577 RM_API *pRmApi = rmapiGetInterface(RMAPI_EXTERNAL_KERNEL); 3578 3579 NV_ASSERT(vaSpace->dummyGpuAlloc.refCount == 0); 3580 3581 // free all the mallocs 3582 if (vaSpace->allocations) 3583 { 3584 portSyncRwLockAcquireWrite(vaSpace->allocationsLock); 3585 destroyAllGpuMemDescriptors(vaSpace->device->session->handle, 3586 vaSpace->allocations); 3587 portSyncRwLockReleaseWrite(vaSpace->allocationsLock); 3588 } 3589 3590 // free all the physical allocations 3591 if (vaSpace->physAllocations) 3592 { 3593 portSyncRwLockAcquireWrite(vaSpace->physAllocationsLock); 3594 destroyAllGpuMemDescriptors(vaSpace->device->session->handle, 3595 vaSpace->physAllocations); 3596 portSyncRwLockReleaseWrite(vaSpace->physAllocationsLock); 3597 } 3598 3599 // Destroy CPU mappings 3600 if (vaSpace->cpuMappings) 3601 { 3602 portSyncRwLockAcquireWrite(vaSpace->cpuMappingsLock); 3603 btreeDestroyData(vaSpace->cpuMappings); 3604 portSyncRwLockReleaseWrite(vaSpace->cpuMappingsLock); 3605 } 3606 3607 if (vaSpace->handle) 3608 pRmApi->Free(pRmApi, vaSpace->device->session->handle, vaSpace->handle); 3609 3610 portSyncRwLockDestroy(vaSpace->allocationsLock); 3611 portSyncRwLockDestroy(vaSpace->cpuMappingsLock); 3612 portSyncRwLockDestroy(vaSpace->physAllocationsLock); 3613 3614 portMemFree(vaSpace); 3615 } 3616 3617 static NV_STATUS nvGpuOpsAllocPhysical(struct gpuDevice *device, 3618 NvBool isSystemMemory, 3619 NvLength length, 3620 NvU64 *paOffset, 3621 gpuAllocInfo *allocInfo) 3622 { 3623 NV_MEMORY_ALLOCATION_PARAMS memAllocParams = {0}; 3624 NV_STATUS status = NV_OK; 3625 RM_API *pRmApi = rmapiGetInterface(RMAPI_EXTERNAL_KERNEL); 3626 3627 NvHandle physHandle = 0; 3628 3629 NV_ASSERT(allocInfo); 3630 NV_ASSERT(device); 3631 NV_ASSERT(paOffset); 3632 3633 // then allocate the physical memory in either sysmem or fb. 3634 memAllocParams.owner = HEAP_OWNER_RM_KERNEL_CLIENT; 3635 3636 // Physical allocations don't expect vaSpace handles 3637 memAllocParams.hVASpace = 0; 3638 3639 // Reset previous offset 3640 memAllocParams.offset = 0; 3641 3642 memAllocParams.size = length; 3643 memAllocParams.type = NVOS32_TYPE_IMAGE; 3644 memAllocParams.attr = isSystemMemory ? 3645 DRF_DEF(OS32, _ATTR, _LOCATION, _PCI) : 3646 DRF_DEF(OS32, _ATTR, _LOCATION, _VIDMEM); 3647 3648 // Always enable caching for System Memory as all the currently supported 3649 // platforms are IO coherent. 3650 memAllocParams.attr |= isSystemMemory ? 3651 DRF_DEF(OS32, _ATTR, _COHERENCY, _CACHED): 3652 DRF_DEF(OS32, _ATTR, _COHERENCY, _UNCACHED); 3653 3654 // Allocate contigous allocation if requested by client 3655 memAllocParams.attr |= allocInfo->bContiguousPhysAlloc ? 3656 DRF_DEF(OS32, _ATTR, _PHYSICALITY, _CONTIGUOUS): 3657 DRF_DEF(OS32, _ATTR, _PHYSICALITY, _DEFAULT); 3658 3659 // Set pageSize for PA-allocation. RM default is Big page size 3660 switch (allocInfo->pageSize) 3661 { 3662 case RM_PAGE_SIZE: 3663 memAllocParams.attr |= DRF_DEF(OS32, _ATTR, _PAGE_SIZE, _4KB); 3664 break; 3665 case RM_PAGE_SIZE_64K: 3666 case RM_PAGE_SIZE_128K: 3667 memAllocParams.attr |= DRF_DEF(OS32, _ATTR, _PAGE_SIZE, _BIG); 3668 break; 3669 case RM_PAGE_SIZE_HUGE: 3670 memAllocParams.attr |= DRF_DEF(OS32, _ATTR, _PAGE_SIZE, _HUGE); 3671 memAllocParams.attr2 |= DRF_DEF(OS32, _ATTR2, _PAGE_SIZE_HUGE, _2MB); 3672 break; 3673 case RM_PAGE_SIZE_512M: 3674 memAllocParams.attr |= DRF_DEF(OS32, _ATTR, _PAGE_SIZE, _HUGE); 3675 memAllocParams.attr2 |= DRF_DEF(OS32, _ATTR2, _PAGE_SIZE_HUGE, _512MB); 3676 break; 3677 default: 3678 memAllocParams.attr |= DRF_DEF(OS32, _ATTR, _PAGE_SIZE, _DEFAULT); 3679 break; 3680 } 3681 3682 // Do we need to allocate at top of FB 3683 if (allocInfo->bMemGrowsDown) 3684 memAllocParams.flags |= NVOS32_ALLOC_FLAGS_FORCE_MEM_GROWS_DOWN; 3685 3686 // Ask RM to allocate persistent video memory 3687 if (!isSystemMemory && allocInfo->bPersistentVidmem) 3688 memAllocParams.flags |= NVOS32_ALLOC_FLAGS_PERSISTENT_VIDMEM; 3689 3690 // 3691 // vid heap ctrl has a different policy as compared to other internal APIS 3692 // it expects the gpu lock to not be held. This means we have to drop the gpu lock 3693 // here. It is safe in this scenario because we still have the API lock and nothing 3694 // from a GPU interrupt can change anything in the OPS state. 3695 // 3696 3697 physHandle = NV01_NULL_OBJECT; 3698 NV_ASSERT_OK_OR_GOTO(status, pRmApi->Alloc(pRmApi, 3699 device->session->handle, 3700 isSystemMemory ? device->handle : device->subhandle, 3701 &physHandle, 3702 isSystemMemory ? NV01_MEMORY_SYSTEM : NV01_MEMORY_LOCAL_USER, 3703 &memAllocParams), done); 3704 if (allocInfo->bContiguousPhysAlloc) 3705 allocInfo->gpuPhysOffset = memAllocParams.offset; 3706 3707 allocInfo->hPhysHandle = physHandle; 3708 *paOffset = (NvU64)allocInfo->gpuPhysOffset; 3709 3710 done: 3711 3712 if (status != NV_OK) 3713 pRmApi->Free(pRmApi, device->session->handle, physHandle); 3714 3715 return status; 3716 } 3717 3718 // The call allocates a virtual memory and associates a PA with it. 3719 static NV_STATUS nvGpuOpsAllocVirtual(struct gpuAddressSpace *vaSpace, 3720 NvLength length, 3721 NvU64 *vaOffset, 3722 NvHandle physHandle, 3723 struct allocFlags flags, 3724 gpuVaAllocInfo *allocInfo) 3725 { 3726 NV_MEMORY_ALLOCATION_PARAMS memAllocParams = { 0 }; 3727 NV_STATUS status; 3728 gpuMemDesc *memDesc = NULL; 3729 RM_API *pRmApi = rmapiGetInterface(RMAPI_EXTERNAL_KERNEL); 3730 3731 NV_ASSERT(allocInfo); 3732 NV_ASSERT(vaSpace); 3733 NV_ASSERT(vaOffset); 3734 NV_ASSERT(physHandle); 3735 3736 memDesc = portMemAllocNonPaged(sizeof(*memDesc)); 3737 if (memDesc == NULL) 3738 return NV_ERR_NO_MEMORY; 3739 3740 // first allocate the virtual memory 3741 3742 memAllocParams.owner = HEAP_OWNER_RM_KERNEL_CLIENT; 3743 memAllocParams.size = length; 3744 memAllocParams.type = NVOS32_TYPE_IMAGE; 3745 memAllocParams.alignment = allocInfo->alignment ? allocInfo->alignment : NV_GPU_SMALL_PAGESIZE; 3746 memAllocParams.flags = NVOS32_ALLOC_FLAGS_VIRTUAL | 3747 NVOS32_ALLOC_FLAGS_ALIGNMENT_FORCE | 3748 NVOS32_ALLOC_FLAGS_ALLOCATE_KERNEL_PRIVILEGED; 3749 3750 // Set pageSize for VA-allocation. RM default is Big page size 3751 switch (allocInfo->pageSize) 3752 { 3753 case RM_PAGE_SIZE: 3754 memAllocParams.attr |= DRF_DEF(OS32, _ATTR, _PAGE_SIZE, _4KB); 3755 break; 3756 case RM_PAGE_SIZE_64K: 3757 case RM_PAGE_SIZE_128K: 3758 memAllocParams.attr |= DRF_DEF(OS32, _ATTR, _PAGE_SIZE, _BIG); 3759 break; 3760 case RM_PAGE_SIZE_HUGE: 3761 memAllocParams.attr |= DRF_DEF(OS32, _ATTR, _PAGE_SIZE, _HUGE); 3762 memAllocParams.attr2 |= DRF_DEF(OS32, _ATTR2, _PAGE_SIZE_HUGE, _2MB); 3763 break; 3764 case RM_PAGE_SIZE_512M: 3765 memAllocParams.attr |= DRF_DEF(OS32, _ATTR, _PAGE_SIZE, _HUGE); 3766 memAllocParams.attr2 |= DRF_DEF(OS32, _ATTR2, _PAGE_SIZE_HUGE, _512MB); 3767 break; 3768 default: 3769 memAllocParams.attr |= DRF_DEF(OS32, _ATTR, _PAGE_SIZE, _DEFAULT); 3770 break; 3771 } 3772 3773 memAllocParams.hVASpace = vaSpace->handle; 3774 3775 memDesc->handle = NV01_NULL_OBJECT; 3776 NV_ASSERT_OK_OR_GOTO(status, pRmApi->Alloc(pRmApi, 3777 vaSpace->device->session->handle, 3778 vaSpace->device->handle, 3779 &memDesc->handle, 3780 NV50_MEMORY_VIRTUAL, 3781 &memAllocParams), done); 3782 memDesc->address = (NvU64)memAllocParams.offset; 3783 memDesc->size = length; 3784 memDesc->childHandle = physHandle; 3785 3786 portSyncRwLockAcquireWrite(vaSpace->allocationsLock); 3787 status = trackDescriptor(&vaSpace->allocations, memDesc->address, memDesc); 3788 portSyncRwLockReleaseWrite(vaSpace->allocationsLock); 3789 3790 if (status != NV_OK) 3791 goto done; 3792 3793 // return the allocated GPU VA 3794 *vaOffset = memDesc->address; 3795 3796 done: 3797 3798 if (status != NV_OK) 3799 pRmApi->Free(pRmApi, vaSpace->device->session->handle, memDesc->handle); 3800 3801 if ((status != NV_OK) && (memDesc != NULL)) 3802 portMemFree(memDesc); 3803 3804 return status; 3805 } 3806 3807 // will need to support offset within allocation 3808 static NV_STATUS nvGpuOpsMapGpuMemory(struct gpuAddressSpace *vaSpace, 3809 NvU64 vaOffset, 3810 NvLength length, 3811 NvU64 pageSize, 3812 NvU64 *gpuOffset, 3813 struct allocFlags flags) 3814 { 3815 gpuMemDesc *memDescVa = NULL; 3816 NV_STATUS status; 3817 NvU64 mappedVa = 0; 3818 NvU32 mapFlags = 0; 3819 NvU64 mapPageSize = 0; 3820 RM_API *pRmApi = rmapiGetInterface(RMAPI_EXTERNAL_KERNEL); 3821 3822 if (!vaSpace || !gpuOffset) 3823 return NV_ERR_INVALID_ARGUMENT; 3824 3825 portSyncRwLockAcquireRead(vaSpace->allocationsLock); 3826 status = findDescriptor(vaSpace->allocations, vaOffset, (void**)&memDescVa); 3827 portSyncRwLockReleaseRead(vaSpace->allocationsLock); 3828 if (status != NV_OK) 3829 return status; 3830 3831 NV_ASSERT(memDescVa); 3832 NV_ASSERT(memDescVa->handle); 3833 NV_ASSERT(memDescVa->childHandle); 3834 3835 if (pageSize == RM_PAGE_SIZE) 3836 { 3837 mapPageSize |= DRF_DEF(OS46, _FLAGS, _PAGE_SIZE, _4KB); 3838 } 3839 else if (pageSize == RM_PAGE_SIZE_HUGE) 3840 { 3841 // TODO: this flag is ignored, remove it once it is deprecated 3842 mapPageSize |= DRF_DEF(OS46, _FLAGS, _PAGE_SIZE, _HUGE); 3843 } 3844 else 3845 { 3846 mapPageSize |= DRF_DEF(OS46, _FLAGS, _PAGE_SIZE, _DEFAULT); 3847 } 3848 3849 // map the 2 surfaces 3850 mapFlags |= ((flags.bGetKernelVA) ? DRF_DEF(OS46, _FLAGS, _KERNEL_MAPPING, _ENABLE) : 3851 DRF_DEF(OS46, _FLAGS, _KERNEL_MAPPING, _NONE)); 3852 mapFlags |= mapPageSize; 3853 3854 // Always enable snooping as that's what's needed for sysmem allocations and 3855 // it's ignored for vidmem. 3856 mapFlags |= DRF_DEF(OS46, _FLAGS, _CACHE_SNOOP, _ENABLE); 3857 3858 // map the 2 surfaces 3859 status = pRmApi->Map(pRmApi, 3860 vaSpace->device->session->handle, 3861 vaSpace->device->handle, 3862 memDescVa->handle, 3863 memDescVa->childHandle, 3864 0, 3865 length, 3866 mapFlags, 3867 &mappedVa); 3868 if (status != NV_OK) 3869 return status; 3870 3871 NV_ASSERT(memDescVa->address == mappedVa); 3872 3873 *gpuOffset = memDescVa->address; 3874 3875 return NV_OK; 3876 } 3877 3878 // 3879 // This function provides a gpu virtual address to a physical region 3880 // that can either be in sysmem or vidmem. 3881 // 3882 static NV_STATUS nvGpuOpsGpuMalloc(struct gpuAddressSpace *vaSpace, 3883 NvBool isSystemMemory, 3884 NvLength length, 3885 NvU64 *gpuOffset, 3886 struct allocFlags flags, 3887 gpuAllocInfo *allocInfo) 3888 { 3889 NV_STATUS status; 3890 NvU64 vaOffset = 0; 3891 NvU64 paOffset = 0; 3892 gpuVaAllocInfo vaAllocInfo = { 0 }; 3893 NvHandle paMemDescHandle; 3894 RM_API *pRmApi = rmapiGetInterface(RMAPI_EXTERNAL_KERNEL); 3895 3896 NV_ASSERT(allocInfo); 3897 NV_ASSERT(vaSpace); 3898 NV_ASSERT(gpuOffset); 3899 3900 // Allocate physical memory first. So that we can associate PA with the memDesc of VA. 3901 // This simplifies tracking of VA and PA handles. 3902 status = nvGpuOpsAllocPhysical(vaSpace->device, isSystemMemory, length, 3903 &paOffset, allocInfo); 3904 if (status != NV_OK) 3905 return status; 3906 3907 NV_ASSERT(allocInfo->hPhysHandle); 3908 3909 paMemDescHandle = allocInfo->hPhysHandle; 3910 vaAllocInfo.pageSize = allocInfo->pageSize; 3911 vaAllocInfo.alignment = allocInfo->alignment; 3912 3913 status = nvGpuOpsAllocVirtual(vaSpace, length, &vaOffset, paMemDescHandle, 3914 flags, &vaAllocInfo); 3915 if (status != NV_OK) 3916 goto cleanup_physical; 3917 3918 status = nvGpuOpsMapGpuMemory(vaSpace, vaOffset, length, 3919 allocInfo->pageSize, gpuOffset, flags); 3920 if (status != NV_OK) 3921 goto cleanup_virtual; 3922 3923 return NV_OK; 3924 3925 cleanup_virtual: 3926 nvGpuOpsFreeVirtual(vaSpace, vaOffset); 3927 cleanup_physical: 3928 pRmApi->Free(pRmApi, vaSpace->device->session->handle, paMemDescHandle); 3929 return status; 3930 } 3931 3932 static void nvGpuOpsFreeVirtual(struct gpuAddressSpace *vaSpace, NvU64 vaOffset) 3933 { 3934 RM_API *pRmApi = rmapiGetInterface(RMAPI_EXTERNAL_KERNEL); 3935 gpuMemDesc *memDescVa = NULL; 3936 portSyncRwLockAcquireWrite(vaSpace->allocationsLock); 3937 deleteDescriptor(&vaSpace->allocations, vaOffset, (void**)&memDescVa); 3938 portSyncRwLockReleaseWrite(vaSpace->allocationsLock); 3939 NV_ASSERT(memDescVa); 3940 pRmApi->Free(pRmApi, vaSpace->device->session->handle, memDescVa->handle); 3941 portMemFree(memDescVa); 3942 } 3943 3944 NV_STATUS nvGpuOpsMemoryAllocFb(struct gpuAddressSpace *vaSpace, 3945 NvLength length, 3946 NvU64 *gpuOffset, 3947 gpuAllocInfo *allocInfo) 3948 { 3949 gpuAllocInfo allocInfoTemp = {0}; 3950 gpuAllocInfo *pAllocInfo; 3951 struct allocFlags flags = {0}; 3952 3953 if (!vaSpace || !gpuOffset) 3954 return NV_ERR_INVALID_ARGUMENT; 3955 3956 // Use default settings if user hasn't provided one. 3957 if (allocInfo == NULL) 3958 { 3959 pAllocInfo = &allocInfoTemp; 3960 } 3961 else 3962 { 3963 pAllocInfo = allocInfo; 3964 } 3965 3966 return nvGpuOpsGpuMalloc(vaSpace, NV_FALSE, length, gpuOffset, flags, 3967 pAllocInfo); 3968 } 3969 3970 NV_STATUS nvGpuOpsMemoryAllocSys(struct gpuAddressSpace *vaSpace, 3971 NvLength length, 3972 NvU64 *gpuOffset, 3973 gpuAllocInfo *allocInfo) 3974 { 3975 gpuAllocInfo allocInfoTemp = {0}; 3976 gpuAllocInfo *pAllocInfo; 3977 struct allocFlags flags = {0}; 3978 3979 if (!vaSpace || !gpuOffset) 3980 return NV_ERR_INVALID_ARGUMENT; 3981 3982 // Use default settings if user hasn't provided one. 3983 if (allocInfo == NULL) 3984 { 3985 pAllocInfo = &allocInfoTemp; 3986 } 3987 else 3988 { 3989 pAllocInfo = allocInfo; 3990 } 3991 3992 return nvGpuOpsGpuMalloc(vaSpace, NV_TRUE, length, gpuOffset, flags, 3993 pAllocInfo); 3994 } 3995 3996 NV_STATUS nvGpuOpsMemoryReopen(struct gpuAddressSpace *vaSpace, 3997 NvHandle hSrcClient, 3998 NvHandle hSrcAllocation, 3999 NvLength length, 4000 NvU64 *gpuOffset) 4001 { 4002 NV_STATUS status; 4003 NvHandle hAllocation = 0; 4004 gpuVaAllocInfo allocInfoTemp = { 0 }; 4005 struct allocFlags flags = { 0 }; 4006 NvU64 vaOffset; 4007 NvHandle hVirtual = 0; 4008 RsResourceRef *pResourceRef; 4009 NvU64 addressOffset = 0; 4010 NvHandle hParent; 4011 RM_API *pRmApi = rmapiGetInterface(RMAPI_EXTERNAL_KERNEL); 4012 4013 // find device type 4014 // TODO: Acquired because serverutilGetResourceRef expects RMAPI lock. Necessary? 4015 status = rmapiLockAcquire(RMAPI_LOCK_FLAGS_READ, RM_LOCK_MODULES_GPU_OPS); 4016 if (status != NV_OK) 4017 return status; 4018 4019 status = serverutilGetResourceRef(hSrcClient, hSrcAllocation, &pResourceRef); 4020 if (status != NV_OK) 4021 { 4022 rmapiLockRelease(); 4023 return NV_ERR_OBJECT_NOT_FOUND; 4024 } 4025 4026 if (!dynamicCast(pResourceRef->pResource, Memory)) 4027 { 4028 rmapiLockRelease(); 4029 return NV_ERR_INVALID_OBJECT_HANDLE; 4030 } 4031 4032 hParent = pResourceRef->pParentRef ? pResourceRef->pParentRef->hResource : 0; 4033 4034 status = serverutilGetResourceRef(hSrcClient, hParent, &pResourceRef); 4035 rmapiLockRelease(); 4036 if (status != NV_OK || !dynamicCast(pResourceRef->pResource, Device)) 4037 return NV_ERR_GENERIC; 4038 4039 if (!vaSpace || !gpuOffset || !hSrcAllocation || !hSrcClient) 4040 return NV_ERR_INVALID_ARGUMENT; 4041 4042 // Dup the physical memory object 4043 hAllocation = NV01_NULL_OBJECT; 4044 status = pRmApi->DupObject(pRmApi, 4045 vaSpace->device->session->handle, 4046 vaSpace->device->handle, 4047 &hAllocation, 4048 hSrcClient, 4049 hSrcAllocation, 4050 NV04_DUP_HANDLE_FLAGS_REJECT_KERNEL_DUP_PRIVILEGE); 4051 if (status != NV_OK) 4052 return status; 4053 4054 // Associate the duped object with the newly created virtual memory object 4055 status = nvGpuOpsAllocVirtual(vaSpace, length, &vaOffset, hAllocation, 4056 flags, &allocInfoTemp); 4057 if (status != NV_OK) 4058 goto cleanup_dup; 4059 4060 status = getHandleForVirtualAddr(vaSpace, vaOffset, NV_FALSE, &hVirtual); 4061 if (status != NV_OK) 4062 goto cleanup_virt_allocation; 4063 4064 // map the memory 4065 status = pRmApi->Map(pRmApi, 4066 vaSpace->device->session->handle, 4067 vaSpace->device->handle, 4068 hVirtual, 4069 hAllocation, 4070 0, 4071 length, 4072 0, 4073 &addressOffset); 4074 if (status != NV_OK) 4075 goto cleanup_virt_allocation; 4076 4077 NV_ASSERT((vaOffset == addressOffset) && "nvGpuOpsMemoryReopen: VA offset Mistmatch!"); 4078 4079 // return the mapped GPU pointer 4080 *gpuOffset = vaOffset; 4081 4082 return NV_OK; 4083 4084 cleanup_virt_allocation: 4085 nvGpuOpsFreeVirtual(vaSpace, vaOffset); 4086 cleanup_dup: 4087 pRmApi->Free(pRmApi, vaSpace->device->session->handle, hAllocation); 4088 return status; 4089 } 4090 4091 NV_STATUS nvGpuOpsPmaAllocPages(void *pPma, NvLength pageCount, NvU64 pageSize, 4092 gpuPmaAllocationOptions *pPmaAllocOptions, 4093 NvU64 *pPages) 4094 { 4095 NV_STATUS status; 4096 gpuPmaAllocationOptions pmaAllocOptionsTemp = {0}; 4097 gpuPmaAllocationOptions *pAllocInfo; 4098 THREAD_STATE_NODE threadState; 4099 4100 if (!pPma || !pPages) 4101 return NV_ERR_INVALID_ARGUMENT; 4102 4103 threadStateInit(&threadState, THREAD_STATE_FLAGS_NONE); 4104 4105 // Use default settings if user hasn't provided one. 4106 if (NULL == pPmaAllocOptions) 4107 { 4108 pAllocInfo = &pmaAllocOptionsTemp; 4109 } 4110 else 4111 { 4112 pAllocInfo = pPmaAllocOptions; 4113 } 4114 4115 // Invoke PMA module to alloc pages. 4116 status = pmaAllocatePages((PMA *)pPma, 4117 pageCount, 4118 pageSize, 4119 (PMA_ALLOCATION_OPTIONS *)pAllocInfo, 4120 pPages); 4121 4122 threadStateFree(&threadState, THREAD_STATE_FLAGS_NONE); 4123 return status; 4124 } 4125 4126 // 4127 // When this API is called from UVM as part of PMA eviction, the thread state 4128 // should have been initialized already and recursive re-init needs to be 4129 // skipped as it's not supported. 4130 // 4131 NV_STATUS nvGpuOpsPmaPinPages(void *pPma, 4132 NvU64 *pPages, 4133 NvLength pageCount, 4134 NvU64 pageSize, 4135 NvU32 flags) 4136 { 4137 NV_STATUS status; 4138 THREAD_STATE_NODE threadState; 4139 NvBool pmaEvictionCall = (flags & UVM_PMA_CALLED_FROM_PMA_EVICTION) != 0; 4140 4141 if (!pPma || !pPages) 4142 return NV_ERR_INVALID_ARGUMENT; 4143 4144 if (!pmaEvictionCall) 4145 threadStateInit(&threadState, THREAD_STATE_FLAGS_NONE); 4146 4147 // Invoke PMA module to Pin pages. 4148 status = pmaPinPages((PMA *)pPma, pPages, pageCount, pageSize); 4149 4150 if (!pmaEvictionCall) 4151 threadStateFree(&threadState, THREAD_STATE_FLAGS_NONE); 4152 return status; 4153 } 4154 4155 NV_STATUS nvGpuOpsPmaUnpinPages(void *pPma, 4156 NvU64 *pPages, 4157 NvLength pageCount, 4158 NvU64 pageSize) 4159 { 4160 NV_STATUS status; 4161 THREAD_STATE_NODE threadState; 4162 threadStateInit(&threadState, THREAD_STATE_FLAGS_NONE); 4163 4164 if (!pPma || !pPages) 4165 return NV_ERR_INVALID_ARGUMENT; 4166 4167 // Invoke PMA module to Unpin pages. 4168 status = pmaUnpinPages((PMA *)pPma, pPages, pageCount, pageSize); 4169 4170 threadStateFree(&threadState, THREAD_STATE_FLAGS_NONE); 4171 return status; 4172 } 4173 4174 // 4175 // When this API is called from UVM as part of PMA eviction, the thread state 4176 // should have been initialized already and recursive re-init needs to be 4177 // skipped as it's not supported. 4178 // 4179 void nvGpuOpsPmaFreePages(void *pPma, 4180 NvU64 *pPages, 4181 NvLength pageCount, 4182 NvU64 pageSize, 4183 NvU32 flags) 4184 { 4185 THREAD_STATE_NODE threadState; 4186 NvU32 pmaFreeFlag = ((flags & UVM_PMA_FREE_IS_ZERO) ? PMA_FREE_SKIP_SCRUB : 0); 4187 NvBool pmaEvictionCall = (flags & UVM_PMA_CALLED_FROM_PMA_EVICTION) != 0; 4188 4189 if (!pmaEvictionCall) 4190 threadStateInit(&threadState, THREAD_STATE_FLAGS_NONE); 4191 4192 if (!pPma || !pPages) 4193 return; 4194 4195 // Invoke PMA module to free pages. 4196 if (flags & UVM_PMA_ALLOCATE_CONTIGUOUS) 4197 pmaFreePages((PMA *)pPma, pPages, 1, pageCount * pageSize, pmaFreeFlag); 4198 else 4199 pmaFreePages((PMA *)pPma, pPages, pageCount, pageSize, pmaFreeFlag); 4200 4201 if (!pmaEvictionCall) 4202 threadStateFree(&threadState, THREAD_STATE_FLAGS_NONE); 4203 } 4204 4205 static NV_STATUS nvGpuOpsChannelGetHwChannelId(struct gpuChannel *channel, 4206 NvU32 *hwChannelId) 4207 { 4208 NV0080_CTRL_FIFO_GET_CHANNELLIST_PARAMS params = {0}; 4209 RM_API *pRmApi = rmapiGetInterface(RMAPI_EXTERNAL_KERNEL); 4210 4211 params.numChannels = 1; 4212 params.pChannelHandleList = NV_PTR_TO_NvP64(&channel->channelHandle); 4213 params.pChannelList = NV_PTR_TO_NvP64(hwChannelId); 4214 4215 return pRmApi->Control(pRmApi, 4216 channel->vaSpace->device->session->handle, 4217 channel->vaSpace->device->handle, 4218 NV0080_CTRL_CMD_FIFO_GET_CHANNELLIST, 4219 ¶ms, 4220 sizeof(params)); 4221 } 4222 4223 static void gpuDeviceUnmapCpuFreeHandle(struct gpuDevice *device, 4224 NvHandle handle, 4225 void *ptr, 4226 NvU32 flags) 4227 { 4228 RM_API *pRmApi = rmapiGetInterface(RMAPI_EXTERNAL_KERNEL); 4229 struct gpuSession *session = device->session; 4230 4231 // Unmap the pointer 4232 if (ptr) 4233 { 4234 NV_STATUS status; 4235 const NvU32 pid = osGetCurrentProcess(); 4236 4237 status = pRmApi->UnmapFromCpu(pRmApi, session->handle, device->subhandle, handle, ptr, flags, pid); 4238 NV_ASSERT(status == NV_OK); 4239 } 4240 4241 // Free the handle 4242 if (handle) 4243 pRmApi->Free(pRmApi, session->handle, handle); 4244 } 4245 4246 static void gpuDeviceDestroyUsermodeRegion(struct gpuDevice *device) 4247 { 4248 subDeviceDesc *rmSubDevice = device->rmSubDevice; 4249 4250 gpuDeviceUnmapCpuFreeHandle(device, 4251 rmSubDevice->clientRegionHandle, 4252 (void *)rmSubDevice->clientRegionMapping, 4253 0); 4254 } 4255 4256 static NV_STATUS gpuDeviceMapUsermodeRegion(struct gpuDevice *device) 4257 { 4258 NV_STATUS status = NV_OK; 4259 NvHandle regionHandle = 0; 4260 RM_API *pRmApi = rmapiGetInterface(RMAPI_EXTERNAL_KERNEL); 4261 struct gpuSession *session = device->session; 4262 subDeviceDesc *rmSubDevice = device->rmSubDevice; 4263 NvU32 usermodeClass = VOLTA_USERMODE_A; 4264 void *pParams = NULL; 4265 NV_HOPPER_USERMODE_A_PARAMS hopperParams = 4266 { 4267 .bBar1Mapping = NV_TRUE, 4268 .bPriv = NV_FALSE 4269 }; 4270 4271 if (device->rmDevice->arch >= GPU_ARCHITECTURE_HOPPER) 4272 { 4273 usermodeClass = HOPPER_USERMODE_A; 4274 pParams = &hopperParams; 4275 } 4276 4277 NV_ASSERT(isDeviceVoltaPlus(device)); 4278 NV_ASSERT(rmSubDevice->clientRegionHandle == 0 && rmSubDevice->clientRegionMapping == NULL); 4279 4280 regionHandle = NV01_NULL_OBJECT; 4281 status = pRmApi->Alloc(pRmApi, 4282 session->handle, 4283 device->subhandle, 4284 ®ionHandle, 4285 usermodeClass, 4286 pParams); 4287 if (NV_OK != status) 4288 return status; 4289 4290 status = pRmApi->MapToCpu(pRmApi, 4291 session->handle, 4292 device->subhandle, 4293 regionHandle, 4294 0, 4295 NVC361_NV_USERMODE__SIZE, 4296 (void **)(&rmSubDevice->clientRegionMapping), 4297 DRF_DEF(OS33, _FLAGS, _ACCESS, _WRITE_ONLY)); 4298 if (NV_OK != status) 4299 goto failure_case; 4300 4301 rmSubDevice->clientRegionHandle = regionHandle; 4302 return status; 4303 4304 failure_case: 4305 pRmApi->Free(pRmApi, device->session->handle, regionHandle); 4306 return status; 4307 } 4308 4309 // 4310 // In Volta+, a channel can submit work by "ringing a doorbell" on the gpu after 4311 // updating the GP_PUT. The doorbell is a register mapped in the client's address 4312 // space and can be shared by all channels in that address space. Each channel writes 4313 // a channel-specific token to the doorbell to trigger the work. 4314 // 4315 static NV_STATUS nvGpuOpsGetWorkSubmissionInfo(struct gpuAddressSpace *vaSpace, 4316 struct gpuChannel *channel) 4317 { 4318 NV_STATUS status = NV_OK; 4319 NVC36F_CTRL_CMD_GPFIFO_GET_WORK_SUBMIT_TOKEN_PARAMS params = {0}; 4320 RM_API *pRmApi = rmapiGetInterface(RMAPI_EXTERNAL_KERNEL); 4321 struct gpuDevice *device = vaSpace->device; 4322 struct gpuSession *session = device->session; 4323 subDeviceDesc *rmSubDevice = device->rmSubDevice; 4324 4325 // Only valid for VOLTA+ (sub)Devices. 4326 NV_ASSERT(isDeviceVoltaPlus(vaSpace->device)); 4327 4328 // Now get the token for submission on given channel. 4329 status = pRmApi->Control(pRmApi, 4330 session->handle, 4331 channel->channelHandle, 4332 NVC36F_CTRL_CMD_GPFIFO_GET_WORK_SUBMIT_TOKEN, 4333 ¶ms, 4334 sizeof(params)); 4335 if (status != NV_OK) 4336 return status; 4337 4338 channel->workSubmissionOffset = (NvU32 *)((NvU8*)rmSubDevice->clientRegionMapping + NVC361_NOTIFY_CHANNEL_PENDING); 4339 channel->workSubmissionToken = params.workSubmitToken; 4340 4341 // 4342 // pWorkSubmissionToken cannot be NULL even if errorNotifier is NULL. 4343 // errorNotifier is checked for NULL previously, so just an assert is 4344 // sufficient. 4345 // 4346 NV_ASSERT_OR_RETURN((channel->errorNotifier != NULL), NV_ERR_INVALID_POINTER); 4347 4348 channel->pWorkSubmissionToken = 4349 (NvU32 *)((NvU8 *)channel->errorNotifier + 4350 (NV_CHANNELGPFIFO_NOTIFICATION_TYPE_WORK_SUBMIT_TOKEN * sizeof(NvNotification)) + 4351 NV_OFFSETOF(NvNotification, info32)); 4352 4353 return status; 4354 } 4355 4356 static NvBool channelNeedsDummyAlloc(struct gpuChannel *channel) 4357 { 4358 return channel->gpPutLoc == UVM_BUFFER_LOCATION_SYS && deviceNeedsDummyAlloc(channel->vaSpace->device); 4359 } 4360 4361 static NV_STATUS channelRetainDummyAlloc(struct gpuChannel *channel, gpuChannelInfo *channelInfo) 4362 { 4363 struct gpuAddressSpace *vaSpace = channel->vaSpace; 4364 NV_STATUS status; 4365 4366 if (!channelNeedsDummyAlloc(channel)) 4367 return NV_OK; 4368 4369 status = nvGpuOpsVaSpaceRetainDummyAlloc(vaSpace); 4370 if (status != NV_OK) 4371 return status; 4372 4373 channel->retainedDummyAlloc = NV_TRUE; 4374 channelInfo->dummyBar1Mapping = vaSpace->dummyGpuAlloc.cpuAddr; 4375 4376 return NV_OK; 4377 } 4378 4379 static void channelReleaseDummyAlloc(struct gpuChannel *channel) 4380 { 4381 if (channel != NULL && channel->retainedDummyAlloc) 4382 { 4383 NV_ASSERT(channelNeedsDummyAlloc(channel)); 4384 nvGpuOpsVaSpaceReleaseDummyAlloc(channel->vaSpace); 4385 } 4386 } 4387 4388 static RM_ENGINE_TYPE channelEngineType(const struct gpuChannel *channel) 4389 { 4390 if (channel->engineType == UVM_GPU_CHANNEL_ENGINE_TYPE_CE) 4391 return RM_ENGINE_TYPE_COPY(channel->engineIndex); 4392 else if (channel->engineType == UVM_GPU_CHANNEL_ENGINE_TYPE_SEC2) 4393 return RM_ENGINE_TYPE_SEC2; 4394 else 4395 return RM_ENGINE_TYPE_GR(channel->engineIndex); 4396 } 4397 4398 static NV_STATUS channelAllocate(struct gpuAddressSpace *vaSpace, 4399 UVM_GPU_CHANNEL_ENGINE_TYPE engineType, 4400 const gpuChannelAllocParams *params, 4401 struct gpuChannel **channelHandle, 4402 gpuChannelInfo *channelInfo) 4403 { 4404 NV_STATUS status; 4405 struct gpuChannel *channel = NULL; 4406 struct gpuDevice *device = NULL; 4407 struct gpuSession *session = NULL; 4408 void *cpuMap = NULL; 4409 NvHandle hErrorNotifier; 4410 struct ChannelAllocInfo *pAllocInfo = NULL; 4411 void *gpfifoCtrl = NULL; 4412 PCLI_DMA_MAPPING_INFO pDmaMappingInfo = NULL; 4413 struct allocFlags flags = {0}; 4414 OBJGPU *pGpu = NULL; 4415 KernelFifo *pKernelFifo = NULL; 4416 NvU32 pid = osGetCurrentProcess(); 4417 NvU32 subdeviceInstance; 4418 UVM_BUFFER_LOCATION gpFifoLoc; 4419 UVM_BUFFER_LOCATION gpPutLoc; 4420 NvLength gpFifoSize, errorNotifierSize; 4421 RM_API *pRmApi = rmapiGetInterface(RMAPI_EXTERNAL_KERNEL); 4422 4423 if (!vaSpace || !channelHandle || !params || !channelInfo) 4424 return NV_ERR_INVALID_ARGUMENT; 4425 4426 if (params->numGpFifoEntries == 0) 4427 return NV_ERR_INVALID_ARGUMENT; 4428 4429 if (engineType != UVM_GPU_CHANNEL_ENGINE_TYPE_CE && 4430 engineType != UVM_GPU_CHANNEL_ENGINE_TYPE_GR && 4431 engineType != UVM_GPU_CHANNEL_ENGINE_TYPE_SEC2) 4432 return NV_ERR_INVALID_ARGUMENT; 4433 4434 // TODO: Bug 2458492: Ampere-SMC Verify GR/CE indices within partition/SMC Engine 4435 4436 device = vaSpace->device; 4437 NV_ASSERT(device); 4438 session = device->session; 4439 NV_ASSERT(session); 4440 4441 // Set location defaults 4442 gpFifoLoc = UVM_BUFFER_LOCATION_SYS; 4443 if (device->fbInfo.bZeroFb) 4444 gpPutLoc = UVM_BUFFER_LOCATION_SYS; 4445 else 4446 gpPutLoc = UVM_BUFFER_LOCATION_VID; 4447 4448 if (isDeviceVoltaPlus(device)) 4449 { 4450 if (params->gpFifoLoc > UVM_BUFFER_LOCATION_VID) 4451 return NV_ERR_INVALID_ARGUMENT; 4452 if (params->gpPutLoc > UVM_BUFFER_LOCATION_VID) 4453 return NV_ERR_INVALID_ARGUMENT; 4454 4455 if (params->gpFifoLoc != UVM_BUFFER_LOCATION_DEFAULT) 4456 gpFifoLoc = params->gpFifoLoc; 4457 if (params->gpPutLoc != UVM_BUFFER_LOCATION_DEFAULT) 4458 gpPutLoc = params->gpPutLoc; 4459 } 4460 else 4461 { 4462 // GPFIFO needs to be placed in sysmem on Pascal and 4463 // pre-Pascal devices (Bug 1750713) 4464 if (params->gpFifoLoc != UVM_BUFFER_LOCATION_DEFAULT || params->gpPutLoc != UVM_BUFFER_LOCATION_DEFAULT) 4465 return NV_ERR_INVALID_ARGUMENT; 4466 } 4467 4468 // TODO: Acquired because CliSetGpuContext expects RMAPI lock. Necessary? 4469 status = rmapiLockAcquire(RMAPI_LOCK_FLAGS_READ, RM_LOCK_MODULES_GPU_OPS); 4470 if (status != NV_OK) 4471 return status; 4472 status = CliSetGpuContext(session->handle, device->handle, &pGpu, NULL); 4473 rmapiLockRelease(); 4474 if (status != NV_OK) 4475 return status; 4476 4477 pAllocInfo = portMemAllocNonPaged(sizeof(*pAllocInfo)); 4478 if (pAllocInfo == NULL) 4479 return NV_ERR_NO_MEMORY; 4480 4481 portMemSet(pAllocInfo, 0, sizeof(*pAllocInfo)); 4482 4483 subdeviceInstance = gpumgrGetSubDeviceInstanceFromGpu(pGpu); 4484 4485 channel = portMemAllocNonPaged(sizeof(*channel)); 4486 if (channel == NULL) 4487 { 4488 status = NV_ERR_NO_MEMORY; 4489 goto cleanup_free_memory; 4490 } 4491 4492 portMemSet(channel, 0, sizeof(*channel)); 4493 4494 channel->vaSpace = vaSpace; 4495 channel->fifoEntries = params->numGpFifoEntries; 4496 channel->gpFifoLoc = gpFifoLoc; 4497 channel->gpPutLoc = gpPutLoc; 4498 4499 // Remember which engine we are using, so that RC recovery can reset it if 4500 // it hangs: 4501 channel->engineType = engineType; 4502 channel->engineIndex = params->engineIndex; 4503 4504 gpFifoSize = (NvLength)params->numGpFifoEntries * NVA06F_GP_ENTRY__SIZE; 4505 4506 // If the allocation is vidmem ask RM to allocate persistent vidmem 4507 pAllocInfo->gpuAllocInfo.bPersistentVidmem = NV_TRUE; 4508 4509 // 1. Allocate the GPFIFO entries. Dont pass any special flags. 4510 flags.bGetKernelVA = NV_FALSE; 4511 status = nvGpuOpsGpuMalloc(vaSpace, 4512 gpFifoLoc == UVM_BUFFER_LOCATION_SYS, 4513 gpFifoSize, 4514 &channel->gpFifo, 4515 flags, 4516 &pAllocInfo->gpuAllocInfo); 4517 if (status != NV_OK) 4518 goto cleanup_free_memory; 4519 4520 // 2. Map the gpfifo entries 4521 status = nvGpuOpsMemoryCpuMap(vaSpace, 4522 channel->gpFifo, 4523 gpFifoSize, 4524 &cpuMap, 4525 PAGE_SIZE_DEFAULT); 4526 if (status != NV_OK) 4527 goto cleanup_free_gpfifo_entries; 4528 4529 channel->gpFifoEntries = (NvU64 *) cpuMap; 4530 4531 // 4532 // 3. Allocate memory for the error notifier. Make the allocation 4533 // sufficiently large to also accommodate any other channel 4534 // notifiers, and request a kernel VA and CPU caching. 4535 // 4536 4537 flags.bGetKernelVA = NV_TRUE; 4538 errorNotifierSize = sizeof(NvNotification) * 4539 NV_CHANNELGPFIFO_NOTIFICATION_TYPE__SIZE_1; 4540 status = nvGpuOpsGpuMalloc(vaSpace, 4541 NV_TRUE, 4542 errorNotifierSize, 4543 &channel->errorNotifierOffset, 4544 flags, 4545 &pAllocInfo->gpuAllocInfo); 4546 if (status != NV_OK) 4547 goto cleanup_unmap_gpfifo_entries; 4548 4549 NV_ASSERT(channel->errorNotifierOffset); 4550 4551 status = getHandleForVirtualAddr(vaSpace, 4552 channel->errorNotifierOffset, 4553 NV_FALSE /*virtual*/, 4554 &hErrorNotifier); 4555 if (status != NV_OK) 4556 goto cleanup_free_virtual; 4557 4558 // 4. Find and share the VA with UVM driver 4559 4560 // TODO: Acquired because CliGetDmaMappingInfo expects RMAPI lock. Necessary? 4561 status = rmapiLockAcquire(RMAPI_LOCK_FLAGS_READ, RM_LOCK_MODULES_GPU_OPS); 4562 if (status != NV_OK) 4563 goto cleanup_free_virtual; 4564 4565 if (!CliGetDmaMappingInfo(session->handle, 4566 device->handle, 4567 hErrorNotifier, 4568 channel->errorNotifierOffset, 4569 gpumgrGetDeviceGpuMask(device->deviceInstance), 4570 &pDmaMappingInfo)) 4571 { 4572 rmapiLockRelease(); 4573 status = NV_ERR_GENERIC; 4574 goto cleanup_free_virtual; 4575 } 4576 4577 rmapiLockRelease(); 4578 4579 // 4580 // RM uses the parent subdevice index to fill the notifier on SYSMEM. So use the same. 4581 // NOTE: the same assumption does not hold for VIDMEM allocations. 4582 // 4583 channel->errorNotifier = (NvNotification*)pDmaMappingInfo->KernelVAddr[subdeviceInstance]; 4584 if (!channel->errorNotifier) 4585 { 4586 status = NV_ERR_GENERIC; 4587 goto cleanup_free_virtual; 4588 } 4589 4590 // Let's allocate the channel 4591 pAllocInfo->gpFifoAllocParams.hObjectError = hErrorNotifier; 4592 status = getHandleForVirtualAddr(vaSpace, 4593 channel->gpFifo, 4594 NV_FALSE /*virtual*/, 4595 &pAllocInfo->gpFifoAllocParams.hObjectBuffer); 4596 if (status != NV_OK) 4597 goto cleanup_free_virtual; 4598 4599 pAllocInfo->gpFifoAllocParams.gpFifoOffset = channel->gpFifo; 4600 pAllocInfo->gpFifoAllocParams.gpFifoEntries = channel->fifoEntries; 4601 // If zero then it will attach to the device address space 4602 pAllocInfo->gpFifoAllocParams.hVASpace = vaSpace->handle; 4603 pAllocInfo->gpFifoAllocParams.engineType = gpuGetNv2080EngineType(channelEngineType(channel)); 4604 4605 if (isDeviceVoltaPlus(device)) 4606 { 4607 4608 flags.bGetKernelVA = NV_FALSE; 4609 status = nvGpuOpsGpuMalloc(vaSpace, 4610 gpPutLoc == UVM_BUFFER_LOCATION_SYS, 4611 sizeof(KeplerAControlGPFifo), 4612 &channel->userdGpuAddr, 4613 flags, 4614 &pAllocInfo->gpuAllocInfo); 4615 if (status != NV_OK) 4616 goto cleanup_free_virtual; 4617 4618 channel->hUserdPhysHandle = pAllocInfo->gpuAllocInfo.hPhysHandle; 4619 4620 SLI_LOOP_START(SLI_LOOP_FLAGS_BC_ONLY) 4621 pAllocInfo->gpFifoAllocParams.hUserdMemory[gpumgrGetSubDeviceInstanceFromGpu(pGpu)] = channel->hUserdPhysHandle; 4622 pAllocInfo->gpFifoAllocParams.userdOffset[gpumgrGetSubDeviceInstanceFromGpu(pGpu)] = 0; 4623 SLI_LOOP_END 4624 4625 status = nvGpuOpsMemoryCpuMap(vaSpace, 4626 channel->userdGpuAddr, 4627 sizeof(KeplerAControlGPFifo), 4628 &gpfifoCtrl, 4629 PAGE_SIZE_DEFAULT); 4630 if (status != NV_OK) 4631 goto cleanup_free_virtual; 4632 } 4633 4634 channel->channelHandle = NV01_NULL_OBJECT; 4635 status = pRmApi->Alloc(pRmApi, session->handle, 4636 device->handle, 4637 &channel->channelHandle, 4638 device->hostClass, 4639 &pAllocInfo->gpFifoAllocParams); 4640 if (status != NV_OK) 4641 { 4642 goto cleanup_free_virtual; 4643 } 4644 4645 // Query runlist ID 4646 pKernelFifo = GPU_GET_KERNEL_FIFO(pGpu); 4647 status = kfifoEngineInfoXlate_HAL(pGpu, 4648 pKernelFifo, 4649 ENGINE_INFO_TYPE_RM_ENGINE_TYPE, 4650 (NvU32)channelEngineType(channel), 4651 ENGINE_INFO_TYPE_RUNLIST, 4652 &channel->hwRunlistId); 4653 if (status != NV_OK) 4654 goto cleanup_free_virtual; 4655 4656 // Query channel ID 4657 status = nvGpuOpsChannelGetHwChannelId(channel, &channel->hwChannelId); 4658 if (status != NV_OK) 4659 goto cleanup_free_channel; 4660 4661 // Map USERD (controlPage) 4662 if (!isDeviceVoltaPlus(device)) 4663 { 4664 status = pRmApi->MapToCpu(pRmApi, 4665 session->handle, 4666 device->subhandle, 4667 channel->channelHandle, 4668 0, 4669 sizeof(KeplerAControlGPFifo), 4670 &gpfifoCtrl, 4671 0); 4672 if (status != NV_OK) 4673 goto cleanup_free_channel; 4674 } 4675 4676 channel->controlPage = gpfifoCtrl; 4677 4678 status = channelRetainDummyAlloc(channel, channelInfo); 4679 if (status != NV_OK) 4680 goto cleanup_free_controlpage; 4681 4682 // Allocate the SW method class for fault cancel 4683 if (isDevicePascalPlus(device) && (engineType != UVM_GPU_CHANNEL_ENGINE_TYPE_SEC2)) 4684 { 4685 channel->hFaultCancelSwMethodClass = NV01_NULL_OBJECT; 4686 status = pRmApi->Alloc(pRmApi, 4687 session->handle, 4688 channel->channelHandle, 4689 &channel->hFaultCancelSwMethodClass, 4690 GP100_UVM_SW, 4691 NULL); 4692 if (status != NV_OK) 4693 goto cleanup_free_controlpage; 4694 } 4695 4696 portMemFree(pAllocInfo); 4697 4698 *channelHandle = channel; 4699 channelInfo->gpGet = &channel->controlPage->GPGet; 4700 channelInfo->gpPut = &channel->controlPage->GPPut; 4701 channelInfo->gpFifoEntries = channel->gpFifoEntries; 4702 channelInfo->channelClassNum = device->hostClass; 4703 channelInfo->numGpFifoEntries = channel->fifoEntries; 4704 channelInfo->errorNotifier = channel->errorNotifier; 4705 channelInfo->hwRunlistId = channel->hwRunlistId; 4706 channelInfo->hwChannelId = channel->hwChannelId; 4707 4708 return NV_OK; 4709 4710 cleanup_free_controlpage: 4711 if (!isDeviceVoltaPlus(device) && (gpfifoCtrl != NULL)) 4712 pRmApi->UnmapFromCpu(pRmApi, session->handle, device->subhandle, channel->channelHandle, gpfifoCtrl, 0, pid); 4713 cleanup_free_channel: 4714 pRmApi->Free(pRmApi, session->handle, channel->channelHandle); 4715 cleanup_free_virtual: 4716 if (isDeviceVoltaPlus(device)) 4717 { 4718 if (gpfifoCtrl != NULL) 4719 nvGpuOpsMemoryCpuUnMap(vaSpace, gpfifoCtrl); 4720 4721 if (channel->userdGpuAddr != 0) 4722 nvGpuOpsMemoryFree(vaSpace, channel->userdGpuAddr); 4723 } 4724 4725 nvGpuOpsMemoryFree(vaSpace, channel->errorNotifierOffset); 4726 cleanup_unmap_gpfifo_entries: 4727 nvGpuOpsMemoryCpuUnMap(vaSpace, channel->gpFifoEntries); 4728 cleanup_free_gpfifo_entries: 4729 nvGpuOpsMemoryFree(vaSpace, channel->gpFifo); 4730 cleanup_free_memory: 4731 channelReleaseDummyAlloc(channel); 4732 portMemFree(channel); 4733 portMemFree(pAllocInfo); 4734 4735 return status; 4736 } 4737 4738 static NV_STATUS engineAllocate(struct gpuChannel *channel, gpuChannelInfo *channelInfo, UVM_GPU_CHANNEL_ENGINE_TYPE engineType) 4739 { 4740 NV_STATUS status = NV_OK; 4741 struct gpuObject *object = NULL; 4742 NVB0B5_ALLOCATION_PARAMETERS ceAllocParams = {0}; 4743 NVA06F_CTRL_GPFIFO_SCHEDULE_PARAMS channelGrpParams = {0}; 4744 struct gpuAddressSpace *vaSpace = NULL; 4745 struct gpuDevice *device = NULL; 4746 struct gpuSession *session = NULL; 4747 RM_API *pRmApi = rmapiGetInterface(RMAPI_EXTERNAL_KERNEL); 4748 NvU32 class; 4749 void *params; 4750 4751 NV_ASSERT(channel); 4752 NV_ASSERT(channelInfo); 4753 NV_ASSERT(channel->engineType == UVM_GPU_CHANNEL_ENGINE_TYPE_CE || 4754 channel->engineType == UVM_GPU_CHANNEL_ENGINE_TYPE_SEC2); 4755 4756 // TODO: Bug 2458492: Ampere-SMC Verify GR/CE indices within partition 4757 4758 vaSpace = channel->vaSpace; 4759 NV_ASSERT(vaSpace); 4760 device = vaSpace->device; 4761 NV_ASSERT(device); 4762 session = device->session; 4763 NV_ASSERT(session); 4764 4765 object = portMemAllocNonPaged(sizeof(*object)); 4766 if (object == NULL) 4767 return NV_ERR_NO_MEMORY; 4768 4769 object->handle = NV01_NULL_OBJECT; 4770 4771 if (engineType == UVM_GPU_CHANNEL_ENGINE_TYPE_CE) 4772 { 4773 ceAllocParams.version = NVB0B5_ALLOCATION_PARAMETERS_VERSION_1; 4774 ceAllocParams.engineType = NV2080_ENGINE_TYPE_COPY(channel->engineIndex); 4775 params = &ceAllocParams; 4776 class = device->ceClass; 4777 } 4778 else 4779 { 4780 params = NULL; 4781 class = device->sec2Class; 4782 } 4783 4784 status = pRmApi->Alloc(pRmApi, session->handle, 4785 channel->channelHandle, 4786 &object->handle, 4787 class, 4788 params); 4789 4790 if (status != NV_OK) 4791 goto cleanup_free_memory; 4792 4793 // In volta+ gpus, the channel has a submission offset used as doorbell. 4794 if (isDeviceVoltaPlus(device)) 4795 { 4796 status = nvGpuOpsGetWorkSubmissionInfo(vaSpace, channel); 4797 if (status != NV_OK) 4798 goto cleanup_free_engine; 4799 4800 channelInfo->workSubmissionOffset = channel->workSubmissionOffset; 4801 channelInfo->workSubmissionToken = channel->workSubmissionToken; 4802 channelInfo->pWorkSubmissionToken = channel->pWorkSubmissionToken; 4803 } 4804 4805 // Schedule the channel 4806 channelGrpParams.bEnable = NV_TRUE; 4807 status = pRmApi->Control(pRmApi, 4808 session->handle, 4809 channel->channelHandle, 4810 NVA06F_CTRL_CMD_GPFIFO_SCHEDULE, 4811 &channelGrpParams, 4812 sizeof(channelGrpParams)); 4813 4814 if (status != NV_OK) 4815 goto cleanup_free_engine; 4816 4817 object->next = channel->nextAttachedEngine; 4818 channel->nextAttachedEngine = object; 4819 object->type = class; 4820 4821 return NV_OK; 4822 4823 cleanup_free_engine: 4824 pRmApi->Free(pRmApi, session->handle, object->handle); 4825 cleanup_free_memory: 4826 portMemFree(object); 4827 return status; 4828 } 4829 4830 NV_STATUS nvGpuOpsChannelAllocate(struct gpuAddressSpace *vaSpace, 4831 const gpuChannelAllocParams *params, 4832 struct gpuChannel **channelHandle, 4833 gpuChannelInfo *channelInfo) 4834 { 4835 NV_STATUS status; 4836 UVM_GPU_CHANNEL_ENGINE_TYPE channelType = params->engineType; 4837 4838 NV_ASSERT_OR_RETURN((channelType == UVM_GPU_CHANNEL_ENGINE_TYPE_CE || channelType == UVM_GPU_CHANNEL_ENGINE_TYPE_SEC2), NV_ERR_NOT_SUPPORTED); 4839 4840 status = channelAllocate(vaSpace, channelType, params, 4841 channelHandle, channelInfo); 4842 if (status != NV_OK) 4843 return status; 4844 4845 status = engineAllocate(*channelHandle, channelInfo, channelType); 4846 if (status != NV_OK) 4847 nvGpuOpsChannelDestroy(*channelHandle); 4848 4849 return status; 4850 } 4851 4852 void nvGpuOpsChannelDestroy(struct gpuChannel *channel) 4853 { 4854 struct gpuObject *nextEngine; 4855 struct gpuObject *currEngine; 4856 NvU32 pid = osGetCurrentProcess(); 4857 struct gpuAddressSpace *vaSpace = NULL; 4858 struct gpuDevice *device = NULL; 4859 struct gpuSession *session = NULL; 4860 RM_API *pRmApi = rmapiGetInterface(RMAPI_EXTERNAL_KERNEL); 4861 4862 if (!channel) 4863 return; 4864 4865 vaSpace = channel->vaSpace; 4866 NV_ASSERT(vaSpace); 4867 device = vaSpace->device; 4868 NV_ASSERT(device); 4869 session = device->session; 4870 NV_ASSERT(session); 4871 4872 // destroy the engines under this channel 4873 if (channel->nextAttachedEngine) 4874 { 4875 currEngine = channel->nextAttachedEngine; 4876 nextEngine = currEngine; 4877 do 4878 { 4879 currEngine = nextEngine; 4880 nextEngine = currEngine->next; 4881 pRmApi->Free(pRmApi, session->handle, currEngine->handle); 4882 portMemFree(currEngine); 4883 } while (nextEngine != NULL); 4884 } 4885 4886 // Tear down the channel 4887 if (isDevicePascalPlus(device)) 4888 pRmApi->Free(pRmApi, session->handle, channel->hFaultCancelSwMethodClass); 4889 4890 if (isDeviceVoltaPlus(device)) 4891 { 4892 nvGpuOpsMemoryCpuUnMap(vaSpace, (void *)channel->controlPage); 4893 nvGpuOpsMemoryFree(vaSpace, channel->userdGpuAddr); 4894 } 4895 else 4896 { 4897 pRmApi->UnmapFromCpu(pRmApi, 4898 session->handle, 4899 device->subhandle, 4900 channel->channelHandle, 4901 (void *)channel->controlPage, 4902 0, 4903 pid); 4904 } 4905 4906 // Free the channel 4907 pRmApi->Free(pRmApi, session->handle, channel->channelHandle); 4908 4909 nvGpuOpsMemoryFree(vaSpace, channel->errorNotifierOffset); 4910 4911 nvGpuOpsMemoryCpuUnMap(vaSpace, channel->gpFifoEntries); 4912 4913 nvGpuOpsMemoryFree(vaSpace, channel->gpFifo); 4914 4915 channelReleaseDummyAlloc(channel); 4916 4917 portMemFree(channel); 4918 } 4919 4920 static NV_STATUS trackDescriptor(PNODE *pRoot, NvU64 key, void *desc) 4921 { 4922 PNODE btreeNode; 4923 NV_ASSERT(desc); 4924 NV_ASSERT(pRoot); 4925 4926 btreeNode = (PNODE)desc; 4927 4928 btreeNode->keyStart = key; 4929 btreeNode->keyEnd = key; 4930 btreeNode->Data = desc; 4931 return btreeInsert(btreeNode, pRoot); 4932 } 4933 4934 static NV_STATUS findDescriptor(PNODE pRoot, NvU64 key, void **desc) 4935 { 4936 PNODE btreeNode = NULL; 4937 NV_STATUS status = NV_OK; 4938 4939 NV_ASSERT(desc); 4940 4941 status = btreeSearch(key, &btreeNode, pRoot); 4942 if (status != NV_OK) 4943 return status; 4944 4945 *desc = btreeNode->Data; 4946 return NV_OK; 4947 } 4948 4949 static NV_STATUS deleteDescriptor(PNODE *pRoot, NvU64 key, void **desc) 4950 { 4951 PNODE btreeNode = NULL; 4952 NV_STATUS status = NV_OK; 4953 4954 NV_ASSERT(desc); 4955 NV_ASSERT(pRoot); 4956 4957 status = btreeSearch(key, &btreeNode, *pRoot); 4958 if (status != NV_OK) 4959 return status ; 4960 4961 *desc = btreeNode->Data; 4962 status = btreeUnlink(btreeNode, pRoot); 4963 return NV_OK; 4964 } 4965 4966 static NV_STATUS destroyAllGpuMemDescriptors(NvHandle hClient, PNODE pNode) 4967 { 4968 RM_API *pRmApi = rmapiGetInterface(RMAPI_EXTERNAL_KERNEL); 4969 gpuMemDesc *memDesc = NULL; 4970 4971 if (pNode == NULL) 4972 return NV_OK; 4973 4974 destroyAllGpuMemDescriptors(hClient, pNode->left); 4975 destroyAllGpuMemDescriptors(hClient, pNode->right); 4976 4977 memDesc = (gpuMemDesc*)pNode->Data; 4978 if (memDesc->childHandle) 4979 pRmApi->Free(pRmApi, hClient, memDesc->childHandle); 4980 4981 if (memDesc->handle) 4982 pRmApi->Free(pRmApi, hClient, memDesc->handle); 4983 4984 portMemFree(pNode->Data); 4985 4986 return NV_OK; 4987 } 4988 4989 // Returns childHandle/handle to a VA memdesc associated with a VA. 4990 static NV_STATUS getHandleForVirtualAddr(struct gpuAddressSpace *vaSpace, 4991 NvU64 allocationAddress, 4992 NvBool bPhysical, 4993 NvHandle *pHandle) 4994 { 4995 NV_STATUS status = NV_OK; 4996 gpuMemDesc *memDesc = NULL; 4997 4998 NV_ASSERT(vaSpace); 4999 NV_ASSERT(pHandle); 5000 5001 portSyncRwLockAcquireRead(vaSpace->allocationsLock); 5002 status = findDescriptor(vaSpace->allocations, allocationAddress, (void**)&memDesc); 5003 portSyncRwLockReleaseRead(vaSpace->allocationsLock); 5004 if (status != NV_OK) 5005 return status; 5006 5007 NV_ASSERT(memDesc); 5008 5009 *pHandle = bPhysical ? memDesc->childHandle : memDesc->handle; 5010 5011 if (!*pHandle) 5012 return NV_ERR_GENERIC; 5013 5014 return NV_OK; 5015 } 5016 5017 // 5018 // Returns a cpu mapping to the provided GPU Offset 5019 // 5020 NV_STATUS nvGpuOpsMemoryCpuMap(struct gpuAddressSpace *vaSpace, 5021 NvU64 memory, 5022 NvLength length, 5023 void **cpuPtr, 5024 NvU64 pageSize) 5025 { 5026 gpuMemDesc *memDesc = NULL; 5027 cpuMappingDesc *cpuMapDesc = NULL; 5028 NV_STATUS status; 5029 void *pMappedAddr = NULL; 5030 NvP64 mappedAddr = 0; 5031 NvU32 flags = 0; 5032 RM_API *pRmApi = rmapiGetInterface(RMAPI_EXTERNAL_KERNEL); 5033 5034 if (!vaSpace || !cpuPtr) 5035 return NV_ERR_INVALID_ARGUMENT; 5036 5037 cpuMapDesc = portMemAllocNonPaged(sizeof(*cpuMapDesc)); 5038 if (cpuMapDesc == NULL) 5039 return NV_ERR_GENERIC; 5040 5041 portSyncRwLockAcquireRead(vaSpace->allocationsLock); 5042 status = findDescriptor(vaSpace->allocations, memory, (void**)&memDesc); 5043 portSyncRwLockReleaseRead(vaSpace->allocationsLock); 5044 if (status != NV_OK) 5045 goto cleanup_desc; 5046 5047 NV_ASSERT(memDesc); 5048 NV_ASSERT(memDesc->childHandle); 5049 5050 // 5051 // Set correct page size for Bar mappings. 5052 // 5053 if (pageSize == RM_PAGE_SIZE) 5054 { 5055 flags |= DRF_DEF(OS46, _FLAGS, _PAGE_SIZE, _4KB); 5056 } 5057 else if (pageSize == RM_PAGE_SIZE_HUGE) 5058 { 5059 // TODO: this flag is ignored, remove it once it is deprecated 5060 flags |= DRF_DEF(OS46, _FLAGS, _PAGE_SIZE, _HUGE); 5061 } 5062 else 5063 { 5064 flags |= DRF_DEF(OS46, _FLAGS, _PAGE_SIZE, _DEFAULT); 5065 } 5066 5067 // 5068 // If the length passed in is zero we will force the mapping 5069 // to the size that was used for allocation of the passed in 5070 // NvU64 5071 // 5072 status = pRmApi->MapToCpu(pRmApi, 5073 vaSpace->device->session->handle, 5074 vaSpace->device->subhandle, 5075 memDesc->childHandle, 5076 0, 5077 length != 0 ? length : memDesc->size, 5078 &pMappedAddr, 5079 flags); 5080 if (status != NV_OK) 5081 goto cleanup_desc; 5082 5083 mappedAddr = NV_PTR_TO_NvP64(pMappedAddr); 5084 5085 cpuMapDesc->cpuPointer = (NvUPtr) mappedAddr; 5086 cpuMapDesc->handle = memDesc->childHandle; 5087 cpuMapDesc->btreeNode.keyStart = (NvU64)cpuMapDesc->cpuPointer; 5088 cpuMapDesc->btreeNode.keyEnd = (NvU64)cpuMapDesc->cpuPointer; 5089 cpuMapDesc->btreeNode.Data = (void *) cpuMapDesc; 5090 5091 // Track CPU memdesc 5092 portSyncRwLockAcquireWrite(vaSpace->cpuMappingsLock); 5093 status = btreeInsert(&cpuMapDesc->btreeNode, &vaSpace->cpuMappings); 5094 portSyncRwLockReleaseWrite(vaSpace->cpuMappingsLock); 5095 if (status != NV_OK) 5096 goto cleanup_desc; 5097 5098 // can use this address as key as Bar1 address space is unique 5099 *cpuPtr = NvP64_VALUE(mappedAddr); 5100 5101 return NV_OK; 5102 5103 cleanup_desc: 5104 portMemFree(cpuMapDesc); 5105 return status; 5106 } 5107 5108 void nvGpuOpsMemoryCpuUnMap(struct gpuAddressSpace *vaSpace, void *cpuPtr) 5109 { 5110 unsigned pid =0; 5111 cpuMappingDesc *mappingDesc = NULL; 5112 PNODE btreeNode; 5113 NV_STATUS status = NV_OK; 5114 RM_API *pRmApi = rmapiGetInterface(RMAPI_EXTERNAL_KERNEL); 5115 5116 if (!vaSpace || !cpuPtr) 5117 return; 5118 5119 portSyncRwLockAcquireRead(vaSpace->cpuMappingsLock); 5120 status = btreeSearch((NvUPtr)cpuPtr, &btreeNode, vaSpace->cpuMappings); 5121 portSyncRwLockReleaseRead(vaSpace->cpuMappingsLock); 5122 if (status != NV_OK) 5123 return; 5124 5125 mappingDesc = (cpuMappingDesc *)btreeNode->Data; 5126 if (mappingDesc) 5127 { 5128 pid = osGetCurrentProcess(); 5129 status = pRmApi->UnmapFromCpu(pRmApi, 5130 vaSpace->device->session->handle, 5131 vaSpace->device->subhandle, 5132 mappingDesc->handle, 5133 NvP64_VALUE(((NvP64)mappingDesc->cpuPointer)), 5134 0, 5135 pid); 5136 NV_ASSERT(status == NV_OK); 5137 } 5138 5139 portSyncRwLockAcquireWrite(vaSpace->cpuMappingsLock); 5140 btreeUnlink(btreeNode, &vaSpace->cpuMappings); 5141 portSyncRwLockReleaseWrite(vaSpace->cpuMappingsLock); 5142 5143 portMemFree(mappingDesc); 5144 return; 5145 } 5146 5147 // This function frees both physical and and virtual memory allocations 5148 // This is a counter-function of nvGpuOpsGpuMalloc! 5149 void nvGpuOpsMemoryFree(struct gpuAddressSpace *vaSpace, NvU64 pointer) 5150 { 5151 RM_API *pRmApi = rmapiGetInterface(RMAPI_EXTERNAL_KERNEL); 5152 gpuMemDesc *memDesc = NULL; 5153 5154 NV_ASSERT(vaSpace); 5155 5156 portSyncRwLockAcquireWrite(vaSpace->allocationsLock); 5157 deleteDescriptor(&vaSpace->allocations, pointer, (void**)&memDesc); 5158 portSyncRwLockReleaseWrite(vaSpace->allocationsLock); 5159 5160 NV_ASSERT(memDesc); 5161 NV_ASSERT(memDesc->childHandle); 5162 NV_ASSERT(memDesc->handle); 5163 5164 // Free physical allocation 5165 pRmApi->Free(pRmApi, vaSpace->device->session->handle, memDesc->childHandle); 5166 5167 // Free virtual allocation 5168 pRmApi->Free(pRmApi, vaSpace->device->session->handle, memDesc->handle); 5169 5170 portMemFree(memDesc); 5171 } 5172 5173 5174 5175 NV_STATUS nvGpuOpsQueryCesCaps(struct gpuDevice *device, 5176 gpuCesCaps *cesCaps) 5177 { 5178 NV_STATUS status; 5179 nvGpuOpsLockSet acquiredLocks; 5180 THREAD_STATE_NODE threadState; 5181 5182 if (!device || !cesCaps) 5183 return NV_ERR_INVALID_ARGUMENT; 5184 5185 threadStateInit(&threadState, THREAD_STATE_FLAGS_NONE); 5186 status = _nvGpuOpsLocksAcquireAll(RMAPI_LOCK_FLAGS_READ, 5187 device->session->handle, 5188 NULL, 5189 &acquiredLocks); 5190 if (status != NV_OK) 5191 { 5192 threadStateFree(&threadState, THREAD_STATE_FLAGS_NONE); 5193 return status; 5194 } 5195 5196 // Refresh CE information, which may have changed if a GPU has been 5197 // initialized by RM for the first time 5198 status = queryCopyEngines(device, cesCaps); 5199 _nvGpuOpsLocksRelease(&acquiredLocks); 5200 threadStateFree(&threadState, THREAD_STATE_FLAGS_NONE); 5201 return status; 5202 } 5203 5204 NV_STATUS nvGpuOpsQueryCaps(struct gpuDevice *device, gpuCaps *caps) 5205 { 5206 NV_STATUS status; 5207 nvGpuOpsLockSet acquiredLocks; 5208 THREAD_STATE_NODE threadState; 5209 OBJGPU *pGpu = NULL; 5210 KernelMemorySystem *pKernelMemorySystem; 5211 NV0000_CTRL_GPU_GET_ID_INFO_V2_PARAMS infoParams = {0}; 5212 struct gpuSession *session = device->session; 5213 RM_API *pRmApi = rmapiGetInterface(RMAPI_GPU_LOCK_INTERNAL); 5214 5215 threadStateInit(&threadState, THREAD_STATE_FLAGS_NONE); 5216 status = _nvGpuOpsLocksAcquireAll(RMAPI_LOCK_FLAGS_READ, device->session->handle, NULL, &acquiredLocks); 5217 if (status != NV_OK) 5218 { 5219 threadStateFree(&threadState, THREAD_STATE_FLAGS_NONE); 5220 return status; 5221 } 5222 5223 caps->sysmemLink = device->sysmemLink; 5224 caps->sysmemLinkRateMBps = device->sysmemLinkRateMBps; 5225 caps->connectedToSwitch = device->connectedToSwitch; 5226 5227 infoParams.gpuId = device->gpuId; 5228 status = pRmApi->Control(pRmApi, 5229 session->handle, 5230 session->handle, 5231 NV0000_CTRL_CMD_GPU_GET_ID_INFO_V2, 5232 &infoParams, 5233 sizeof(infoParams)); 5234 if (status != NV_OK) 5235 { 5236 _nvGpuOpsLocksRelease(&acquiredLocks); 5237 threadStateFree(&threadState, THREAD_STATE_FLAGS_NONE); 5238 return status; 5239 } 5240 5241 if (infoParams.numaId != NV0000_CTRL_NO_NUMA_NODE) 5242 { 5243 caps->numaEnabled = NV_TRUE; 5244 caps->numaNodeId = infoParams.numaId; 5245 } 5246 5247 status = CliSetGpuContext(session->handle, device->handle, &pGpu, NULL); 5248 if (status != NV_OK) 5249 { 5250 _nvGpuOpsLocksRelease(&acquiredLocks); 5251 threadStateFree(&threadState, THREAD_STATE_FLAGS_NONE); 5252 return status; 5253 } 5254 5255 pKernelMemorySystem = GPU_GET_KERNEL_MEMORY_SYSTEM(pGpu); 5256 if (!pKernelMemorySystem) 5257 { 5258 _nvGpuOpsLocksRelease(&acquiredLocks); 5259 threadStateFree(&threadState, THREAD_STATE_FLAGS_NONE); 5260 return NV_ERR_OBJECT_NOT_FOUND; 5261 } 5262 5263 if (pGpu->getProperty(pGpu, PDB_PROP_GPU_ATS_SUPPORTED)) 5264 { 5265 caps->systemMemoryWindowStart = pKernelMemorySystem->coherentCpuFbBase; 5266 caps->systemMemoryWindowSize = pKernelMemorySystem->coherentCpuFbEnd - 5267 pKernelMemorySystem->coherentCpuFbBase; 5268 } 5269 else 5270 { 5271 caps->systemMemoryWindowStart = 0; 5272 caps->systemMemoryWindowSize = 0; 5273 } 5274 5275 if (device->connectedToSwitch) 5276 { 5277 KernelNvlink *pKernelNvlink = GPU_GET_KERNEL_NVLINK(pGpu); 5278 if (pKernelNvlink == NULL) 5279 { 5280 caps->nvswitchMemoryWindowStart = NVLINK_INVALID_FABRIC_ADDR; 5281 } 5282 else 5283 { 5284 caps->nvswitchMemoryWindowStart = knvlinkGetUniqueFabricBaseAddress( 5285 pGpu, pKernelNvlink); 5286 } 5287 } 5288 5289 _nvGpuOpsLocksRelease(&acquiredLocks); 5290 threadStateFree(&threadState, THREAD_STATE_FLAGS_NONE); 5291 return NV_OK; 5292 } 5293 5294 static NV_STATUS findVaspaceFromPid(unsigned pid, unsigned gpuId, 5295 NvHandle *hClient, NvHandle *hDevice, 5296 NvHandle *hSubdevice, NvHandle *hVaSpace) 5297 { 5298 // 5299 // This function iterates through all the vaspace objects under the client, 5300 // that matches the pid argument, and returns any address space that is 5301 // tagged as UVM. 5302 // 5303 Device *pDevice = NULL; 5304 Subdevice *pSubDevice = NULL; 5305 OBJVASPACE *pVAS = NULL; 5306 OBJGPU *pGpu; 5307 unsigned hDeviceLocal = 0; 5308 unsigned hSubDeviceLocal = 0; 5309 NV_STATUS status; 5310 RmClient **ppClient; 5311 RmClient *pClient; 5312 RsClient *pRsClient; 5313 5314 for (ppClient = serverutilGetFirstClientUnderLock(); 5315 ppClient; 5316 ppClient = serverutilGetNextClientUnderLock(ppClient)) 5317 { 5318 pClient = *ppClient; 5319 pRsClient = staticCast(pClient, RsClient); 5320 if (pClient->ProcID == pid) 5321 { 5322 pGpu = gpumgrGetGpuFromId(gpuId); 5323 if (!pGpu) 5324 return NV_ERR_INVALID_ARGUMENT; 5325 5326 pSubDevice = CliGetSubDeviceInfoFromGpu(pRsClient->hClient, 5327 pGpu); 5328 5329 status = deviceGetByGpu(pRsClient, pGpu, NV_TRUE, &pDevice); 5330 if (status == NV_OK) 5331 { 5332 hDeviceLocal = RES_GET_HANDLE(pDevice); 5333 5334 if (pSubDevice != NULL) 5335 hSubDeviceLocal = RES_GET_HANDLE(pSubDevice); 5336 5337 *hClient = pRsClient->hClient; 5338 *hDevice = hDeviceLocal; 5339 *hSubdevice = hSubDeviceLocal; 5340 5341 if (pDevice->vaMode != 5342 NV_DEVICE_ALLOCATION_VAMODE_MULTIPLE_VASPACES) 5343 { 5344 status = vaspaceGetByHandleOrDeviceDefault(pRsClient, hDeviceLocal, 0, &pVAS); 5345 if ((status != NV_OK) || (pVAS == NULL)) 5346 return NV_ERR_GENERIC; 5347 5348 // 5349 // TODO: Bug 1632484: 5350 // Check to see if pVAS is UVM_MANAGED, once 5351 // that vaspace property is introduced. 5352 // No need to check FaultCapable. 5353 // 5354 if ((vaspaceIsMirrored(pVAS)) || 5355 (vaspaceIsFaultCapable(pVAS))) 5356 { 5357 // 5358 // This means that this client is 5359 // using the vaspace associated to its device 5360 // 5361 *hVaSpace = 0; 5362 return NV_OK; 5363 } 5364 } 5365 5366 // 5367 // if the default VASPACE is not tagged as UVM 5368 // will search for all vaspace objects under 5369 // this client for this device to find the first 5370 // vaspace that is tagged as UVM. 5371 // 5372 if (findUvmAddressSpace(*hClient, pGpu->gpuInstance, hVaSpace, &pVAS) == NV_OK) 5373 { 5374 return NV_OK; 5375 } 5376 } 5377 } 5378 } 5379 return NV_ERR_GENERIC; 5380 } 5381 5382 // 5383 // This function will look through all the vaspaces under a client for a device and return 5384 // the one that is tagged as UVM, or NULL if there is no UVM vaspace. 5385 // 5386 static NV_STATUS findUvmAddressSpace(NvHandle hClient, NvU32 gpuInstance, NvHandle *phVaSpace, OBJVASPACE **ppVASpace) 5387 { 5388 RsResourceRef *pResourceRef; 5389 RS_ITERATOR iter; 5390 NvU32 gpuMask = NVBIT(gpuInstance); 5391 5392 iter = serverutilRefIter(hClient, NV01_NULL_OBJECT, classId(VaSpaceApi), RS_ITERATE_DESCENDANTS, NV_TRUE); 5393 5394 while (clientRefIterNext(iter.pClient, &iter)) 5395 { 5396 pResourceRef = iter.pResourceRef; 5397 5398 *ppVASpace = dynamicCast(pResourceRef->pResource, VaSpaceApi)->pVASpace; 5399 *phVaSpace = pResourceRef->hResource; 5400 5401 if ((vaspaceIsMirrored(*ppVASpace) || vaspaceIsExternallyOwned(*ppVASpace)) && 5402 (((*ppVASpace)->gpuMask & gpuMask) == gpuMask)) 5403 { 5404 return NV_OK; 5405 } 5406 } 5407 *phVaSpace = 0; 5408 *ppVASpace = NULL; 5409 return NV_ERR_INVALID_ARGUMENT; 5410 } 5411 5412 // Make sure UVM_GPU_NAME_LENGTH has the same length as 5413 // NV2080_GPU_MAX_NAME_STRING_LENGTH. 5414 ct_assert(NV2080_GPU_MAX_NAME_STRING_LENGTH == UVM_GPU_NAME_LENGTH); 5415 5416 static void getGpcTpcInfo(OBJGPU *pGpu, gpuInfo *pGpuInfo) 5417 { 5418 KernelGraphicsManager *pKernelGraphicsManager = GPU_GET_KERNEL_GRAPHICS_MANAGER(pGpu); 5419 5420 pGpuInfo->maxTpcPerGpcCount = 0; 5421 pGpuInfo->maxGpcCount = 0; 5422 pGpuInfo->gpcCount = 0; 5423 pGpuInfo->tpcCount = 0; 5424 5425 NV_ASSERT_OR_RETURN_VOID(pKernelGraphicsManager->legacyKgraphicsStaticInfo.bInitialized); 5426 NV_ASSERT_OR_RETURN_VOID(pKernelGraphicsManager->legacyKgraphicsStaticInfo.pGrInfo != NULL); 5427 5428 pGpuInfo->maxTpcPerGpcCount = 5429 pKernelGraphicsManager->legacyKgraphicsStaticInfo.pGrInfo->infoList[NV2080_CTRL_GR_INFO_INDEX_LITTER_NUM_TPC_PER_GPC].data; 5430 pGpuInfo->maxGpcCount = 5431 pKernelGraphicsManager->legacyKgraphicsStaticInfo.pGrInfo->infoList[NV2080_CTRL_GR_INFO_INDEX_LITTER_NUM_GPCS].data; 5432 pGpuInfo->gpcCount = 5433 nvPopCount32(pKernelGraphicsManager->legacyKgraphicsStaticInfo.floorsweepingMasks.gpcMask); 5434 5435 // 5436 // When MIG GPU partitioning is enabled, compute the upper bound on the number 5437 // of TPCs that may be available in this partition, to enable UVM to 5438 // conservatively size relevant data structures. 5439 // 5440 if (IS_MIG_IN_USE(pGpu)) 5441 { 5442 pGpuInfo->tpcCount = pGpuInfo->gpcCount * pGpuInfo->maxTpcPerGpcCount; 5443 } 5444 else 5445 { 5446 KernelGraphics *pKernelGraphics = GPU_GET_KERNEL_GRAPHICS(pGpu, 0); 5447 const KGRAPHICS_STATIC_INFO *pKernelGraphicsStaticInfo = kgraphicsGetStaticInfo(pGpu, pKernelGraphics); 5448 5449 NV_ASSERT_OR_RETURN_VOID(pKernelGraphicsStaticInfo != NULL); 5450 pGpuInfo->tpcCount = pKernelGraphicsStaticInfo->pGrInfo->infoList[NV2080_CTRL_GR_INFO_INDEX_SHADER_PIPE_SUB_COUNT].data; 5451 } 5452 } 5453 5454 static NV_STATUS queryVirtMode(NvHandle hClient, NvHandle hDevice, NvU32 *virtMode) 5455 { 5456 NV_STATUS status = NV_OK; 5457 *virtMode = UVM_VIRT_MODE_NONE; 5458 return status; 5459 } 5460 5461 NV_STATUS nvGpuOpsGetGpuInfo(const NvProcessorUuid *pUuid, 5462 const gpuClientInfo *pGpuClientInfo, 5463 gpuInfo *pGpuInfo) 5464 { 5465 NV_STATUS status; 5466 NV0080_ALLOC_PARAMETERS nv0080AllocParams = {0}; 5467 NV2080_ALLOC_PARAMETERS nv2080AllocParams = {0}; 5468 NV0000_CTRL_GPU_GET_UUID_INFO_PARAMS gpuIdInfoParams = {{0}}; 5469 NV2080_CTRL_MC_GET_ARCH_INFO_PARAMS archInfoParams = {0}; 5470 NV2080_CTRL_GPU_GET_NAME_STRING_PARAMS gpuNameParams = {0}; 5471 NvHandle clientHandle = 0; 5472 NvHandle deviceHandle = 1; 5473 NvHandle subDeviceHandle = 2; 5474 NvBool isClientAllocated = NV_FALSE; 5475 NvBool isDeviceAllocated = NV_FALSE; 5476 NvBool isSubdeviceAllocated = NV_FALSE; 5477 NV0080_CTRL_GPU_GET_NUM_SUBDEVICES_PARAMS subDevParams = { 0 }; 5478 NV2080_CTRL_GPU_GET_SIMULATION_INFO_PARAMS simulationInfoParams = {0}; 5479 OBJGPU *pGpu = NULL; 5480 RM_API *pRmApi = rmapiGetInterface(RMAPI_EXTERNAL_KERNEL); 5481 NvU32 dummy; 5482 5483 pGpu = gpumgrGetGpuFromUuid(pUuid->uuid, 5484 DRF_DEF(2080_GPU_CMD, _GPU_GET_GID_FLAGS, _TYPE, _SHA1) | 5485 DRF_DEF(2080_GPU_CMD, _GPU_GET_GID_FLAGS, _FORMAT, _BINARY)); 5486 if (!pGpu) 5487 { 5488 return NV_ERR_GPU_UUID_NOT_FOUND; 5489 } 5490 5491 if (!osIsGpuAccessible(pGpu)) 5492 { 5493 return NV_ERR_INSUFFICIENT_PERMISSIONS; 5494 } 5495 5496 status = nvGpuOpsCreateClient(pRmApi, &clientHandle); 5497 if (status != NV_OK) 5498 { 5499 return status; 5500 } 5501 5502 isClientAllocated = NV_TRUE; 5503 5504 portMemCopy(&gpuIdInfoParams.gpuUuid, sizeof(*pUuid), pUuid, sizeof(*pUuid)); 5505 5506 gpuIdInfoParams.flags = NV0000_CTRL_CMD_GPU_GET_UUID_INFO_FLAGS_FORMAT_BINARY; 5507 status = pRmApi->Control(pRmApi, 5508 clientHandle, 5509 clientHandle, 5510 NV0000_CTRL_CMD_GPU_GET_UUID_INFO, 5511 &gpuIdInfoParams, 5512 sizeof(gpuIdInfoParams)); 5513 if (NV_OK != status) 5514 goto cleanup; 5515 5516 nv0080AllocParams.deviceId = gpuIdInfoParams.deviceInstance; 5517 5518 status = pRmApi->Alloc(pRmApi, 5519 clientHandle, 5520 clientHandle, 5521 &deviceHandle, 5522 NV01_DEVICE_0, 5523 &nv0080AllocParams); 5524 if (NV_OK != status) 5525 goto cleanup; 5526 5527 isDeviceAllocated = NV_TRUE; 5528 5529 nv2080AllocParams.subDeviceId = gpuIdInfoParams.subdeviceInstance; 5530 status = pRmApi->Alloc(pRmApi, 5531 clientHandle, 5532 deviceHandle, 5533 &subDeviceHandle, 5534 NV20_SUBDEVICE_0, 5535 &nv2080AllocParams); 5536 if (NV_OK != status) 5537 goto cleanup; 5538 5539 isSubdeviceAllocated = NV_TRUE; 5540 5541 portMemCopy(&pGpuInfo->uuid, sizeof(*pUuid), pUuid, sizeof(*pUuid)); 5542 5543 status = pRmApi->Control(pRmApi, 5544 clientHandle, 5545 subDeviceHandle, 5546 NV2080_CTRL_CMD_MC_GET_ARCH_INFO, 5547 &archInfoParams, 5548 sizeof(archInfoParams)); 5549 if (NV_OK != status) 5550 goto cleanup; 5551 5552 pGpuInfo->gpuArch = archInfoParams.architecture; 5553 pGpuInfo->gpuImplementation = archInfoParams.implementation; 5554 5555 gpuNameParams.gpuNameStringFlags = NV2080_CTRL_GPU_GET_NAME_STRING_FLAGS_TYPE_ASCII; 5556 status = pRmApi->Control(pRmApi, 5557 clientHandle, 5558 subDeviceHandle, 5559 NV2080_CTRL_CMD_GPU_GET_NAME_STRING, 5560 &gpuNameParams, 5561 sizeof(gpuNameParams)); 5562 if (NV_OK != status) 5563 goto cleanup; 5564 5565 portStringCopy(pGpuInfo->name, sizeof(pGpuInfo->name), 5566 (const char *)gpuNameParams.gpuNameString.ascii, 5567 sizeof(gpuNameParams.gpuNameString.ascii)); 5568 5569 status = queryVirtMode(clientHandle, deviceHandle, &pGpuInfo->virtMode); 5570 if (status != NV_OK) 5571 goto cleanup; 5572 5573 pGpuInfo->gpuInTcc = NV_FALSE; 5574 5575 status = findDeviceClasses(clientHandle, 5576 deviceHandle, 5577 subDeviceHandle, 5578 &pGpuInfo->hostClass, 5579 &pGpuInfo->ceClass, 5580 &pGpuInfo->computeClass, 5581 &dummy, 5582 &dummy, 5583 &dummy); 5584 if (status != NV_OK) 5585 goto cleanup; 5586 5587 status = pRmApi->Control(pRmApi, 5588 clientHandle, 5589 deviceHandle, 5590 NV0080_CTRL_CMD_GPU_GET_NUM_SUBDEVICES, 5591 &subDevParams, 5592 sizeof(subDevParams)); 5593 if (status != NV_OK) 5594 goto cleanup; 5595 5596 pGpuInfo->subdeviceCount = subDevParams.numSubDevices; 5597 5598 getGpcTpcInfo(pGpu, pGpuInfo); 5599 5600 if (IS_MIG_IN_USE(pGpu)) 5601 { 5602 NvU32 swizzId; 5603 5604 NV_ASSERT(pGpuInfo->subdeviceCount == 1); 5605 5606 status = getSwizzIdFromUserSmcPartHandle(pRmApi, 5607 clientHandle, 5608 deviceHandle, 5609 pGpuClientInfo->hClient, 5610 pGpuClientInfo->hSmcPartRef, 5611 &swizzId); 5612 if (status != NV_OK) 5613 goto cleanup; 5614 5615 pGpuInfo->smcEnabled = NV_TRUE; 5616 pGpuInfo->smcSwizzId = swizzId; 5617 pGpuInfo->smcUserClientInfo.hClient = pGpuClientInfo->hClient; 5618 pGpuInfo->smcUserClientInfo.hSmcPartRef = pGpuClientInfo->hSmcPartRef; 5619 } 5620 5621 status = pRmApi->Control(pRmApi, 5622 clientHandle, 5623 subDeviceHandle, 5624 NV2080_CTRL_CMD_GPU_GET_SIMULATION_INFO, 5625 &simulationInfoParams, 5626 sizeof(simulationInfoParams)); 5627 if (status != NV_OK) 5628 goto cleanup; 5629 5630 pGpuInfo->isSimulated = (simulationInfoParams.type != NV2080_CTRL_GPU_GET_SIMULATION_INFO_TYPE_NONE); 5631 5632 cleanup: 5633 if (isSubdeviceAllocated) 5634 pRmApi->Free(pRmApi, clientHandle, subDeviceHandle); 5635 5636 if (isDeviceAllocated) 5637 pRmApi->Free(pRmApi, clientHandle, deviceHandle); 5638 5639 if (isClientAllocated) 5640 pRmApi->Free(pRmApi, clientHandle, clientHandle); 5641 5642 return status; 5643 } 5644 5645 NV_STATUS nvGpuOpsGetGpuIds(const NvU8 *pUuid, 5646 unsigned uuidLength, 5647 NvU32 *pDeviceId, 5648 NvU32 *pSubdeviceId) 5649 { 5650 NV_STATUS nvStatus; 5651 nvGpuOpsLockSet acquiredLocks; 5652 THREAD_STATE_NODE threadState; 5653 NV0000_CTRL_GPU_GET_UUID_INFO_PARAMS gpuIdInfoParams = {{0}}; 5654 NvHandle clientHandle = 0; 5655 RM_API *pRmApi = rmapiGetInterface(RMAPI_GPU_LOCK_INTERNAL); 5656 5657 threadStateInit(&threadState, THREAD_STATE_FLAGS_NONE); 5658 nvStatus = _nvGpuOpsLocksAcquireAll(RMAPI_LOCK_FLAGS_NONE, NV01_NULL_OBJECT, NULL, &acquiredLocks); 5659 if (nvStatus != NV_OK) 5660 { 5661 threadStateFree(&threadState, THREAD_STATE_FLAGS_NONE); 5662 return nvStatus; 5663 } 5664 5665 nvStatus = nvGpuOpsCreateClient(pRmApi, &clientHandle); 5666 if (nvStatus != NV_OK) 5667 { 5668 _nvGpuOpsLocksRelease(&acquiredLocks); 5669 threadStateFree(&threadState, THREAD_STATE_FLAGS_NONE); 5670 return nvStatus; 5671 } 5672 5673 portMemCopy(&gpuIdInfoParams.gpuUuid, uuidLength, pUuid, uuidLength); 5674 5675 gpuIdInfoParams.flags = NV0000_CTRL_CMD_GPU_GET_UUID_INFO_FLAGS_FORMAT_BINARY; 5676 nvStatus = pRmApi->Control(pRmApi, 5677 clientHandle, 5678 clientHandle, 5679 NV0000_CTRL_CMD_GPU_GET_UUID_INFO, 5680 &gpuIdInfoParams, 5681 sizeof(gpuIdInfoParams)); 5682 if (NV_OK == nvStatus) 5683 { 5684 *pDeviceId = gpuIdInfoParams.deviceInstance; 5685 *pSubdeviceId = gpuIdInfoParams.subdeviceInstance; 5686 } 5687 5688 pRmApi->Free(pRmApi, clientHandle, clientHandle); 5689 5690 _nvGpuOpsLocksRelease(&acquiredLocks); 5691 threadStateFree(&threadState, THREAD_STATE_FLAGS_NONE); 5692 return nvStatus; 5693 } 5694 5695 NV_STATUS nvGpuOpsServiceDeviceInterruptsRM(struct gpuDevice *device) 5696 { 5697 NV_STATUS status; 5698 nvGpuOpsLockSet acquiredLocks; 5699 THREAD_STATE_NODE threadState; 5700 NV2080_CTRL_MC_SERVICE_INTERRUPTS_PARAMS params = {0}; 5701 RM_API *pRmApi = rmapiGetInterface(RMAPI_GPU_LOCK_INTERNAL); 5702 5703 threadStateInit(&threadState, THREAD_STATE_FLAGS_NONE); 5704 status = _nvGpuOpsLocksAcquireAll(RMAPI_LOCK_FLAGS_NONE, device->session->handle, NULL, &acquiredLocks); 5705 if (status != NV_OK) 5706 { 5707 threadStateFree(&threadState, THREAD_STATE_FLAGS_NONE); 5708 return status; 5709 } 5710 5711 params.engines = NV2080_CTRL_MC_ENGINE_ID_ALL; 5712 status = pRmApi->Control(pRmApi, 5713 device->session->handle, 5714 device->subhandle, 5715 NV2080_CTRL_CMD_MC_SERVICE_INTERRUPTS, 5716 ¶ms, 5717 sizeof(params)); 5718 5719 _nvGpuOpsLocksRelease(&acquiredLocks); 5720 threadStateFree(&threadState, THREAD_STATE_FLAGS_NONE); 5721 return status; 5722 } 5723 5724 NV_STATUS nvGpuOpsCheckEccErrorSlowpath(struct gpuChannel *channel, 5725 NvBool *bEccDbeSet) 5726 { 5727 NV_STATUS status = NV_OK; 5728 nvGpuOpsLockSet acquiredLocks; 5729 THREAD_STATE_NODE threadState; 5730 NV2080_CTRL_GPU_QUERY_ECC_STATUS_PARAMS eccStatus; 5731 NvU32 i = 0; 5732 RM_API *pRmApi = rmapiGetInterface(RMAPI_GPU_LOCK_INTERNAL); 5733 5734 if (!channel || !bEccDbeSet) 5735 { 5736 return NV_ERR_INVALID_ARGUMENT; 5737 } 5738 5739 threadStateInit(&threadState, THREAD_STATE_FLAGS_NONE); 5740 status = _nvGpuOpsLocksAcquireAll(RMAPI_LOCK_FLAGS_READ, 5741 channel->vaSpace->device->session->handle, 5742 NULL, 5743 &acquiredLocks); 5744 if (status != NV_OK) 5745 { 5746 threadStateFree(&threadState, THREAD_STATE_FLAGS_NONE); 5747 return status; 5748 } 5749 5750 *bEccDbeSet = NV_FALSE; 5751 5752 // Do anything only if ECC is enabled on this device 5753 if (channel->vaSpace->device->rmSubDevice->bEccEnabled) 5754 { 5755 portMemSet(&eccStatus, 0, sizeof(eccStatus)); 5756 5757 status = pRmApi->Control(pRmApi, 5758 channel->vaSpace->device->session->handle, 5759 channel->vaSpace->device->subhandle, 5760 NV2080_CTRL_CMD_GPU_QUERY_ECC_STATUS, 5761 &eccStatus, 5762 sizeof(eccStatus)); 5763 if (status != NV_OK) 5764 { 5765 _nvGpuOpsLocksRelease(&acquiredLocks); 5766 threadStateFree(&threadState, THREAD_STATE_FLAGS_NONE); 5767 return NV_ERR_GENERIC; 5768 } 5769 5770 for (i = 0; i < NV2080_CTRL_GPU_ECC_UNIT_COUNT; i++) 5771 { 5772 if (eccStatus.units[i].dbe.count != 0) 5773 { 5774 *bEccDbeSet = NV_TRUE; 5775 } 5776 } 5777 } 5778 5779 _nvGpuOpsLocksRelease(&acquiredLocks); 5780 threadStateFree(&threadState, THREAD_STATE_FLAGS_NONE); 5781 return status; 5782 } 5783 5784 static NV_STATUS nvGpuOpsFillGpuMemoryInfo(PMEMORY_DESCRIPTOR pMemDesc, 5785 OBJGPU *pMappingGpu, 5786 gpuMemoryInfo *pGpuMemoryInfo) 5787 { 5788 NV_STATUS status; 5789 PMEMORY_DESCRIPTOR pRootMemDesc = memdescGetRootMemDesc(pMemDesc, NULL); 5790 OBJGPU *pGpu = (pMemDesc->pGpu == NULL) ? pMappingGpu : pMemDesc->pGpu; 5791 5792 status = nvGpuOpsMemGetPageSize(pMappingGpu, 5793 pMemDesc, 5794 &pGpuMemoryInfo->pageSize); 5795 if (status != NV_OK) 5796 return status; 5797 5798 pGpuMemoryInfo->size = memdescGetSize(pMemDesc); 5799 5800 pGpuMemoryInfo->contig = memdescGetContiguity(pMemDesc, AT_GPU); 5801 5802 if (pGpuMemoryInfo->contig) 5803 { 5804 GMMU_APERTURE aperture = nvGpuOpsGetExternalAllocAperture(pMemDesc, NV_FALSE, NV_FALSE); 5805 NvU64 physAddr; 5806 5807 memdescGetPhysAddrsForGpu(pMemDesc, pMappingGpu, AT_GPU, 0, 0, 1, &physAddr); 5808 5809 pGpuMemoryInfo->physAddr = 5810 kgmmuEncodePhysAddr(GPU_GET_KERNEL_GMMU(pGpu), aperture, physAddr, NVLINK_INVALID_FABRIC_ADDR); 5811 } 5812 5813 pGpuMemoryInfo->kind = memdescGetPteKindForGpu(pMemDesc, pMappingGpu); 5814 5815 pGpuMemoryInfo->sysmem = (memdescGetAddressSpace(pMemDesc) == ADDR_SYSMEM); 5816 5817 pGpuMemoryInfo->deviceDescendant = pRootMemDesc->pGpu != NULL; 5818 5819 if (pGpuMemoryInfo->deviceDescendant) 5820 { 5821 NvU8 *uuid; 5822 NvU32 uuidLength, flags; 5823 NV_STATUS status; 5824 flags = DRF_DEF(2080_GPU_CMD, _GPU_GET_GID_FLAGS, _TYPE, _SHA1) | 5825 DRF_DEF(2080_GPU_CMD, _GPU_GET_GID_FLAGS, _FORMAT, _BINARY); 5826 5827 // on success, allocates memory for uuid 5828 status = gpuGetGidInfo(pGpu, &uuid, &uuidLength, flags); 5829 if (status != NV_OK) 5830 return status; 5831 5832 portMemCopy(&pGpuMemoryInfo->uuid, uuidLength, uuid, uuidLength); 5833 portMemFree(uuid); 5834 } 5835 5836 return NV_OK; 5837 } 5838 5839 static NvBool memdescIsSysmem(PMEMORY_DESCRIPTOR pMemDesc) 5840 { 5841 return memdescGetAddressSpace(pMemDesc) == ADDR_SYSMEM; 5842 } 5843 5844 static NV_STATUS dupMemory(struct gpuDevice *device, 5845 NvHandle hClient, 5846 NvHandle hPhysMemory, 5847 NvU32 flags, 5848 NvHandle *hDupMemory, 5849 gpuMemoryInfo *pGpuMemoryInfo) 5850 { 5851 NV_STATUS status = NV_OK; 5852 nvGpuOpsLockSet acquiredLocks; 5853 THREAD_STATE_NODE threadState; 5854 NvHandle dupedMemHandle; 5855 Memory *pMemory = NULL; 5856 PMEMORY_DESCRIPTOR pMemDesc = NULL; 5857 MEMORY_DESCRIPTOR *pAdjustedMemDesc = NULL; 5858 FABRIC_VASPACE *pFabricVAS = NULL; 5859 OBJGPU *pMappingGpu; 5860 RM_API *pRmApi = rmapiGetInterface(RMAPI_GPU_LOCK_INTERNAL); 5861 RsResourceRef *pResourceRef; 5862 RsResourceRef *pParentRef; 5863 struct gpuSession *session; 5864 NvHandle hParent; 5865 NvHandle hSubDevice; 5866 NvBool bIsIndirectPeer = NV_FALSE; 5867 5868 if (!device || !hDupMemory) 5869 return NV_ERR_INVALID_ARGUMENT; 5870 5871 NV_ASSERT((flags == NV04_DUP_HANDLE_FLAGS_REJECT_KERNEL_DUP_PRIVILEGE) || (flags == NV04_DUP_HANDLE_FLAGS_NONE)); 5872 5873 threadStateInit(&threadState, THREAD_STATE_FLAGS_NONE); 5874 5875 // RS-TODO use dual client locking 5876 status = _nvGpuOpsLocksAcquireAll(RMAPI_LOCK_FLAGS_NONE, NV01_NULL_OBJECT, NULL, &acquiredLocks); 5877 if (status != NV_OK) 5878 { 5879 threadStateFree(&threadState, THREAD_STATE_FLAGS_NONE); 5880 return status; 5881 } 5882 5883 status = CliSetSubDeviceContext(device->session->handle, 5884 device->subhandle, 5885 &hSubDevice, 5886 &pMappingGpu); 5887 5888 if (status != NV_OK) 5889 goto done; 5890 5891 // Get all the necessary information about the memory 5892 status = nvGpuOpsGetMemoryByHandle(hClient, 5893 hPhysMemory, 5894 &pMemory); 5895 if (status != NV_OK) 5896 goto done; 5897 5898 // RM client allocations can't have multiple memdesc. 5899 pMemDesc = pMemory->pMemDesc; 5900 NV_ASSERT(!memdescHasSubDeviceMemDescs(pMemDesc)); 5901 5902 pAdjustedMemDesc = pMemDesc; 5903 pFabricVAS = dynamicCast(pMappingGpu->pFabricVAS, FABRIC_VASPACE); 5904 if (pFabricVAS != NULL) 5905 { 5906 status = fabricvaspaceGetGpaMemdesc(pFabricVAS, pMemDesc, pMappingGpu, &pAdjustedMemDesc); 5907 if (status != NV_OK) 5908 goto done; 5909 } 5910 5911 if (memdescGetAddressSpace(pAdjustedMemDesc) != ADDR_FBMEM && 5912 memdescGetAddressSpace(pAdjustedMemDesc) != ADDR_SYSMEM && 5913 memdescGetAddressSpace(pAdjustedMemDesc) != ADDR_FABRIC_MC && 5914 memdescGetAddressSpace(pAdjustedMemDesc) != ADDR_FABRIC_V2) 5915 { 5916 status = NV_ERR_NOT_SUPPORTED; 5917 goto freeGpaMemdesc; 5918 } 5919 5920 // For SYSMEM or indirect peer mappings 5921 bIsIndirectPeer = gpumgrCheckIndirectPeer(pMappingGpu, pAdjustedMemDesc->pGpu); 5922 if (bIsIndirectPeer || 5923 memdescIsSysmem(pAdjustedMemDesc)) 5924 { 5925 // For sysmem allocations, the dup done below is very shallow and in 5926 // particular doesn't create IOMMU mappings required for the mapped GPU 5927 // to access the memory. That's a problem if the mapped GPU is different 5928 // from the GPU that the allocation was created under. Add them 5929 // explicitly here and remove them when the memory is freed in n 5930 // nvGpuOpsFreeDupedHandle(). Notably memdescMapIommu() refcounts the 5931 // mappings so it's ok to call it if the mappings are already there. 5932 // 5933 // TODO: Bug 1811060: Add native support for this use-case in RM API. 5934 status = memdescMapIommu(pAdjustedMemDesc, pMappingGpu->busInfo.iovaspaceId); 5935 if (status != NV_OK) 5936 goto freeGpaMemdesc; 5937 } 5938 5939 session = device->session; 5940 5941 if (pGpuMemoryInfo) 5942 { 5943 RsClient *pClient; 5944 status = serverGetClientUnderLock(&g_resServ, session->handle, &pClient); 5945 if (status != NV_OK) 5946 goto freeGpaMemdesc; 5947 5948 status = nvGpuOpsFillGpuMemoryInfo(pAdjustedMemDesc, pMappingGpu, pGpuMemoryInfo); 5949 if (status != NV_OK) 5950 goto freeGpaMemdesc; 5951 } 5952 5953 pResourceRef = RES_GET_REF(pMemory); 5954 pParentRef = pResourceRef->pParentRef; 5955 5956 // TODO: Bug 2479851: temporarily detect the type of the parent of the 5957 // memory object (device or subdevice). Once CUDA switches to subdevices, 5958 // we will use subdevice handles unconditionally, here. 5959 if (dynamicCast(pParentRef->pResource, Subdevice)) 5960 { 5961 hParent = device->subhandle; 5962 } 5963 else if (dynamicCast(pParentRef->pResource, RsClientResource)) 5964 { 5965 NvBool bAssert = ( 5966 (memdescGetAddressSpace(pAdjustedMemDesc) == ADDR_FABRIC_MC) || 5967 (memdescGetAddressSpace(pAdjustedMemDesc) == ADDR_FABRIC_V2)); 5968 5969 NV_ASSERT(bAssert); 5970 5971 hParent = session->handle; 5972 } 5973 else 5974 { 5975 NV_ASSERT(dynamicCast(pParentRef->pResource, Device)); 5976 hParent = device->handle; 5977 } 5978 5979 dupedMemHandle = NV01_NULL_OBJECT; 5980 status = pRmApi->DupObject(pRmApi, 5981 session->handle, 5982 hParent, 5983 &dupedMemHandle, 5984 hClient, 5985 hPhysMemory, 5986 flags); 5987 if (status != NV_OK) 5988 goto freeGpaMemdesc; 5989 5990 *hDupMemory = dupedMemHandle; 5991 5992 freeGpaMemdesc: 5993 if (pAdjustedMemDesc != pMemDesc) 5994 fabricvaspacePutGpaMemdesc(pFabricVAS, pAdjustedMemDesc); 5995 5996 done: 5997 _nvGpuOpsLocksRelease(&acquiredLocks); 5998 threadStateFree(&threadState, THREAD_STATE_FLAGS_NONE); 5999 return status; 6000 } 6001 6002 NV_STATUS nvGpuOpsDupMemory(struct gpuDevice *device, 6003 NvHandle hClient, 6004 NvHandle hPhysMemory, 6005 NvHandle *hDupMemory, 6006 gpuMemoryInfo *pGpuMemoryInfo) 6007 { 6008 return dupMemory(device, 6009 hClient, 6010 hPhysMemory, 6011 NV04_DUP_HANDLE_FLAGS_REJECT_KERNEL_DUP_PRIVILEGE, 6012 hDupMemory, 6013 pGpuMemoryInfo); 6014 } 6015 6016 NV_STATUS nvGpuOpsDupAllocation(struct gpuAddressSpace *srcVaSpace, 6017 NvU64 srcAddress, 6018 struct gpuAddressSpace *dstVaSpace, 6019 NvU64 dstVaAlignment, 6020 NvU64 *dstAddress) 6021 { 6022 NV_STATUS status; 6023 NvHandle dstPhysHandle = 0; 6024 NvHandle srcPhysHandle = 0; 6025 NvU64 tmpDstAddress = 0; 6026 gpuMemoryInfo gpuMemoryInfo = {0}; 6027 gpuVaAllocInfo allocInfo = {0}; 6028 struct allocFlags flags = {0}; 6029 RM_API *pRmApi = rmapiGetInterface(RMAPI_EXTERNAL_KERNEL); 6030 6031 NV_CHECK_OR_RETURN(LEVEL_ERROR, srcVaSpace != 0, NV_ERR_INVALID_ARGUMENT); 6032 NV_CHECK_OR_RETURN(LEVEL_ERROR, dstVaSpace != 0, NV_ERR_INVALID_ARGUMENT); 6033 6034 NV_CHECK_OR_RETURN(LEVEL_ERROR, srcVaSpace != dstVaSpace, NV_ERR_INVALID_ARGUMENT); 6035 NV_CHECK_OR_RETURN(LEVEL_ERROR, srcAddress != 0, NV_ERR_INVALID_ARGUMENT); 6036 NV_CHECK_OR_RETURN(LEVEL_ERROR, dstAddress != NULL, NV_ERR_INVALID_ARGUMENT); 6037 6038 // If the given combination of source VA space and address does not 6039 // correspond to a previous allocation, the physical handle retrieval fails 6040 status = getHandleForVirtualAddr(srcVaSpace, srcAddress, NV_TRUE, &srcPhysHandle); 6041 if (status != NV_OK) 6042 return status; 6043 6044 // Dupe the physical allocation, and return information about the associated 6045 // memory descriptor 6046 // 6047 // Passing NV04_DUP_HANDLE_FLAGS_NONE allows duping across MIG partitions 6048 status = dupMemory(dstVaSpace->device, 6049 srcVaSpace->device->session->handle, 6050 srcPhysHandle, 6051 NV04_DUP_HANDLE_FLAGS_NONE, 6052 &dstPhysHandle, 6053 &gpuMemoryInfo); 6054 6055 if (status != NV_OK) 6056 return status; 6057 6058 // Vidmem dups across GPUs are not currently supported 6059 if (!gpuMemoryInfo.sysmem && (srcVaSpace->device != dstVaSpace->device)) 6060 { 6061 status = NV_ERR_NOT_SUPPORTED; 6062 goto cleanup_dup; 6063 } 6064 6065 // The virtual allocation and mapping use the size, page size, and alignment 6066 // of the destination memory descriptor. 6067 allocInfo.pageSize = gpuMemoryInfo.pageSize; 6068 allocInfo.alignment = dstVaAlignment; 6069 6070 status = nvGpuOpsAllocVirtual(dstVaSpace, 6071 gpuMemoryInfo.size, 6072 dstAddress, 6073 dstPhysHandle, 6074 flags, 6075 &allocInfo); 6076 if (status != NV_OK) 6077 goto cleanup_dup; 6078 6079 // Map the entire memory 6080 status = nvGpuOpsMapGpuMemory(dstVaSpace, 6081 *dstAddress, 6082 gpuMemoryInfo.size, 6083 gpuMemoryInfo.pageSize, 6084 &tmpDstAddress, 6085 flags); 6086 6087 if (status != NV_OK) 6088 goto cleanup_virt_allocation; 6089 6090 NV_ASSERT(tmpDstAddress == *dstAddress); 6091 6092 return NV_OK; 6093 6094 cleanup_virt_allocation: 6095 nvGpuOpsFreeVirtual(dstVaSpace, *dstAddress); 6096 6097 cleanup_dup: 6098 pRmApi->Free(pRmApi, dstVaSpace->device->session->handle, dstPhysHandle); 6099 return status; 6100 } 6101 6102 NV_STATUS nvGpuOpsGetGuid(NvHandle hClient, NvHandle hDevice, 6103 NvHandle hSubDevice, NvU8 *gpuGuid, 6104 unsigned guidLength) 6105 { 6106 NV_STATUS status; 6107 nvGpuOpsLockSet acquiredLocks; 6108 THREAD_STATE_NODE threadState; 6109 NV2080_CTRL_GPU_GET_GID_INFO_PARAMS getGidParams = {0}; 6110 RM_API *pRmApi = rmapiGetInterface(RMAPI_GPU_LOCK_INTERNAL); 6111 6112 if (!gpuGuid) 6113 return NV_ERR_INVALID_ARGUMENT; 6114 6115 threadStateInit(&threadState, THREAD_STATE_FLAGS_NONE); 6116 status = _nvGpuOpsLocksAcquireAll(RMAPI_LOCK_FLAGS_READ, hClient, NULL, &acquiredLocks); 6117 if (status != NV_OK) 6118 { 6119 threadStateFree(&threadState, THREAD_STATE_FLAGS_NONE); 6120 return status; 6121 } 6122 6123 getGidParams.index = 0; 6124 getGidParams.flags = NV2080_GPU_CMD_GPU_GET_GID_FLAGS_FORMAT_BINARY; 6125 status = pRmApi->Control(pRmApi, 6126 hClient, hSubDevice, 6127 NV2080_CTRL_CMD_GPU_GET_GID_INFO, 6128 &getGidParams, 6129 sizeof(getGidParams)); 6130 6131 if ((guidLength != getGidParams.length) || (status != NV_OK)) 6132 { 6133 _nvGpuOpsLocksRelease(&acquiredLocks); 6134 threadStateFree(&threadState, THREAD_STATE_FLAGS_NONE); 6135 return NV_ERR_INVALID_ARGUMENT; 6136 } 6137 6138 portMemCopy(gpuGuid, guidLength, &getGidParams.data, guidLength); 6139 6140 _nvGpuOpsLocksRelease(&acquiredLocks); 6141 threadStateFree(&threadState, THREAD_STATE_FLAGS_NONE); 6142 return status; 6143 } 6144 6145 // Make sure UVM_COPY_ENGINE_COUNT_MAX is at least the number of copy engines 6146 // supported by RM. 6147 ct_assert(UVM_COPY_ENGINE_COUNT_MAX >= NV2080_ENGINE_TYPE_COPY_SIZE); 6148 6149 static void setCeCaps(const NvU8 *rmCeCaps, gpuCeCaps *ceCaps) 6150 { 6151 ceCaps->grce = !!NV2080_CTRL_CE_GET_CAP(rmCeCaps, NV2080_CTRL_CE_CAPS_CE_GRCE); 6152 ceCaps->shared = !!NV2080_CTRL_CE_GET_CAP(rmCeCaps, NV2080_CTRL_CE_CAPS_CE_SHARED); 6153 ceCaps->sysmemRead = !!NV2080_CTRL_CE_GET_CAP(rmCeCaps, NV2080_CTRL_CE_CAPS_CE_SYSMEM_READ); 6154 ceCaps->sysmemWrite = !!NV2080_CTRL_CE_GET_CAP(rmCeCaps, NV2080_CTRL_CE_CAPS_CE_SYSMEM_WRITE); 6155 ceCaps->nvlinkP2p = !!NV2080_CTRL_CE_GET_CAP(rmCeCaps, NV2080_CTRL_CE_CAPS_CE_NVLINK_P2P); 6156 ceCaps->sysmem = !!NV2080_CTRL_CE_GET_CAP(rmCeCaps, NV2080_CTRL_CE_CAPS_CE_SYSMEM); 6157 ceCaps->p2p = !!NV2080_CTRL_CE_GET_CAP(rmCeCaps, NV2080_CTRL_CE_CAPS_CE_P2P); 6158 } 6159 6160 static NV_STATUS queryCopyEngines(struct gpuDevice *gpu, gpuCesCaps *cesCaps) 6161 { 6162 NV_STATUS status = NV_OK; 6163 NV2080_CTRL_GPU_GET_ENGINES_PARAMS getEnginesParams = {0}; 6164 NvU32 *engineList; 6165 NvU32 i; 6166 RM_API *pRmApi = rmapiGetInterface(RMAPI_GPU_LOCK_INTERNAL); 6167 6168 status = pRmApi->Control(pRmApi, 6169 gpu->session->handle, 6170 gpu->subhandle, 6171 NV2080_CTRL_CMD_GPU_GET_ENGINES, 6172 &getEnginesParams, 6173 sizeof(getEnginesParams)); 6174 if (status != NV_OK) 6175 return status; 6176 6177 engineList = portMemAllocNonPaged( 6178 sizeof(*engineList) * getEnginesParams.engineCount); 6179 if (engineList == NULL) 6180 return NV_ERR_NO_MEMORY; 6181 6182 getEnginesParams.engineList = NV_PTR_TO_NvP64(engineList); 6183 6184 status = pRmApi->Control(pRmApi, 6185 gpu->session->handle, 6186 gpu->subhandle, 6187 NV2080_CTRL_CMD_GPU_GET_ENGINES, 6188 &getEnginesParams, 6189 sizeof(getEnginesParams)); 6190 if (status != NV_OK) 6191 goto done; 6192 6193 portMemSet(cesCaps, 0, sizeof(*cesCaps)); 6194 6195 for (i = 0; i < getEnginesParams.engineCount; i++) 6196 { 6197 NV2080_CTRL_CE_GET_CAPS_PARAMS ceParams = {0}; 6198 NV2080_CTRL_CE_GET_CE_PCE_MASK_PARAMS pceMaskParams = {0}; 6199 NvU8 rmCeCaps[NV2080_CTRL_CE_CAPS_TBL_SIZE] = {0}; 6200 UvmGpuCopyEngineCaps *ceCaps; 6201 NvU32 ceIndex; 6202 6203 if (!NV2080_ENGINE_TYPE_IS_COPY(engineList[i])) 6204 continue; 6205 6206 ceIndex = NV2080_ENGINE_TYPE_COPY_IDX(engineList[i]); 6207 if (ceIndex >= NV2080_ENGINE_TYPE_COPY_SIZE) 6208 continue; 6209 6210 ceParams.ceEngineType = NV2080_ENGINE_TYPE_COPY(ceIndex); 6211 ceParams.capsTblSize = NV2080_CTRL_CE_CAPS_TBL_SIZE; 6212 ceParams.capsTbl = NV_PTR_TO_NvP64(rmCeCaps); 6213 6214 status = pRmApi->Control(pRmApi, 6215 gpu->session->handle, 6216 gpu->subhandle, 6217 NV2080_CTRL_CMD_CE_GET_CAPS, 6218 &ceParams, 6219 sizeof(ceParams)); 6220 if (status != NV_OK) 6221 { 6222 NV_PRINTF(LEVEL_ERROR, "%s:%d: %s\n", __FUNCTION__, 6223 __LINE__, nvstatusToString(status)); 6224 goto done; 6225 } 6226 6227 ceCaps = cesCaps->copyEngineCaps + ceIndex; 6228 setCeCaps(rmCeCaps, ceCaps); 6229 6230 pceMaskParams.ceEngineType = NV2080_ENGINE_TYPE_COPY(ceIndex); 6231 pceMaskParams.pceMask = 0; 6232 status = pRmApi->Control(pRmApi, 6233 gpu->session->handle, 6234 gpu->subhandle, 6235 NV2080_CTRL_CMD_CE_GET_CE_PCE_MASK, 6236 &pceMaskParams, 6237 sizeof(pceMaskParams)); 6238 if (status != NV_OK) 6239 { 6240 NV_PRINTF(LEVEL_ERROR, "%s:%d: %s\n", __FUNCTION__, 6241 __LINE__, nvstatusToString(status)); 6242 goto done; 6243 } 6244 ceCaps->cePceMask = pceMaskParams.pceMask; 6245 6246 ceCaps->supported = NV_TRUE; 6247 } 6248 6249 done: 6250 portMemFree(engineList); 6251 return status; 6252 } 6253 6254 static NvBool isClassHost(NvU32 class) 6255 { 6256 NvBool bHostClass = NV_FALSE; 6257 CLI_CHANNEL_CLASS_INFO classInfo; 6258 CliGetChannelClassInfo(class, &classInfo); 6259 bHostClass = (classInfo.classType == CHANNEL_CLASS_TYPE_GPFIFO); 6260 return bHostClass; 6261 } 6262 6263 static NvBool isClassCE(NvU32 class) 6264 { 6265 switch (class) 6266 { 6267 case MAXWELL_DMA_COPY_A: 6268 case PASCAL_DMA_COPY_A: 6269 case PASCAL_DMA_COPY_B: 6270 case VOLTA_DMA_COPY_A: 6271 case TURING_DMA_COPY_A: 6272 case AMPERE_DMA_COPY_A: 6273 case AMPERE_DMA_COPY_B: 6274 case HOPPER_DMA_COPY_A: 6275 return NV_TRUE; 6276 6277 default: 6278 return NV_FALSE; 6279 } 6280 } 6281 6282 static NvBool isClassSec2(NvU32 class) 6283 { 6284 switch (class) 6285 { 6286 6287 default: 6288 return NV_FALSE; 6289 } 6290 } 6291 6292 static NvBool isClassCompute(NvU32 class) 6293 { 6294 switch (class) 6295 { 6296 case MAXWELL_COMPUTE_A: 6297 case MAXWELL_COMPUTE_B: 6298 case PASCAL_COMPUTE_A: 6299 case PASCAL_COMPUTE_B: 6300 case VOLTA_COMPUTE_A: 6301 case VOLTA_COMPUTE_B: 6302 case TURING_COMPUTE_A: 6303 case AMPERE_COMPUTE_A: 6304 case AMPERE_COMPUTE_B: 6305 case HOPPER_COMPUTE_A: 6306 return NV_TRUE; 6307 6308 default: 6309 return NV_FALSE; 6310 } 6311 } 6312 6313 static NvBool isClassFaultBuffer(NvU32 class) 6314 { 6315 switch (class) 6316 { 6317 case MAXWELL_FAULT_BUFFER_A: 6318 case MMU_FAULT_BUFFER: 6319 return NV_TRUE; 6320 6321 default: 6322 return NV_FALSE; 6323 } 6324 } 6325 6326 static NvBool isClassAccessCounterBuffer(NvU32 class) 6327 { 6328 switch (class) 6329 { 6330 case ACCESS_COUNTER_NOTIFY_BUFFER: 6331 return NV_TRUE; 6332 6333 default: 6334 return NV_FALSE; 6335 } 6336 } 6337 6338 static NV_STATUS findDeviceClasses(NvHandle hRoot, 6339 NvHandle hDevice, 6340 NvHandle hSubdevice, 6341 NvU32 *hostClass, 6342 NvU32 *ceClass, 6343 NvU32 *computeClass, 6344 NvU32 *faultBufferClass, 6345 NvU32 *accessCounterBufferClass, 6346 NvU32 *sec2Class) 6347 { 6348 NvU32 *classList; 6349 NV_STATUS status = NV_OK; 6350 NV0080_CTRL_GPU_GET_CLASSLIST_PARAMS classParams = {0}; 6351 NvU32 i = 0; 6352 RM_API *pRmApi = rmapiGetInterface(RMAPI_EXTERNAL_KERNEL); 6353 6354 *hostClass = 0; 6355 *ceClass = 0; 6356 *computeClass = 0; 6357 *faultBufferClass = 0; 6358 *accessCounterBufferClass = 0; 6359 6360 status = pRmApi->Control(pRmApi, 6361 hRoot, 6362 hDevice, 6363 NV0080_CTRL_CMD_GPU_GET_CLASSLIST, 6364 &classParams, 6365 sizeof(classParams)); 6366 if (status != NV_OK) 6367 return status; 6368 6369 classList = portMemAllocNonPaged( 6370 (sizeof(NvU32) * classParams.numClasses)); 6371 6372 if (classList == NULL) 6373 { 6374 return NV_ERR_INSUFFICIENT_RESOURCES; 6375 } 6376 6377 classParams.classList = NV_PTR_TO_NvP64(classList); 6378 status = pRmApi->Control(pRmApi, 6379 hRoot, 6380 hDevice, 6381 NV0080_CTRL_CMD_GPU_GET_CLASSLIST, 6382 &classParams, 6383 sizeof(classParams)); 6384 6385 if (status != NV_OK) 6386 goto Cleanup_classlist; 6387 6388 for (i = 0; i < classParams.numClasses; i++) 6389 { 6390 if (classList[i] == PHYSICAL_CHANNEL_GPFIFO) 6391 continue; 6392 if (isClassHost(classList[i])) 6393 *hostClass = NV_MAX(*hostClass, classList[i]); 6394 else if (isClassCE(classList[i])) 6395 *ceClass = NV_MAX(*ceClass, classList[i]); 6396 else if (isClassCompute(classList[i])) 6397 *computeClass = NV_MAX(*computeClass, classList[i]); 6398 else if (isClassFaultBuffer(classList[i])) 6399 *faultBufferClass = NV_MAX(*faultBufferClass, classList[i]); 6400 else if (isClassAccessCounterBuffer(classList[i])) 6401 { 6402 NV_ASSERT(accessCounterBufferClass); 6403 *accessCounterBufferClass = NV_MAX(*accessCounterBufferClass, classList[i]); 6404 } 6405 else if (isClassSec2(classList[i])) 6406 *sec2Class = NV_MAX(*sec2Class, classList[i]); 6407 } 6408 6409 Cleanup_classlist: 6410 portMemFree(classList); 6411 return status; 6412 } 6413 6414 NV_STATUS nvGpuOpsGetClientInfoFromPid(unsigned pid, 6415 const NvU8 *gpuUuid, 6416 NvHandle *hClient, 6417 NvHandle *hDevice, 6418 NvHandle *hSubDevice) 6419 { 6420 NV0000_CTRL_GPU_GET_UUID_INFO_PARAMS gpuIdInfoParams = {{0}}; 6421 unsigned gpuId = 0; 6422 NvHandle hPidClient = 0; 6423 NvHandle hPidDevice = 0; 6424 NvHandle hPidVaSpace = 0; 6425 NvHandle hPidSubDevice = 0; 6426 NvHandle clientHandle = 0; 6427 NV_STATUS status; 6428 nvGpuOpsLockSet acquiredLocks; 6429 THREAD_STATE_NODE threadState; 6430 RM_API *pRmApi = rmapiGetInterface(RMAPI_GPU_LOCK_INTERNAL); 6431 6432 threadStateInit(&threadState, THREAD_STATE_FLAGS_NONE); 6433 status = _nvGpuOpsLocksAcquireAll(RMAPI_LOCK_FLAGS_NONE, NV01_NULL_OBJECT, NULL, &acquiredLocks); 6434 if (status != NV_OK) 6435 { 6436 threadStateFree(&threadState, THREAD_STATE_FLAGS_NONE); 6437 return status; 6438 } 6439 6440 status = nvGpuOpsCreateClient(pRmApi, &clientHandle); 6441 if (status != NV_OK) 6442 { 6443 _nvGpuOpsLocksRelease(&acquiredLocks); 6444 threadStateFree(&threadState, THREAD_STATE_FLAGS_NONE); 6445 return status; 6446 } 6447 6448 // find the gpuId from the given uuid 6449 portMemCopy(&gpuIdInfoParams.gpuUuid, NV_GPU_UUID_LEN, gpuUuid, NV_GPU_UUID_LEN); 6450 gpuIdInfoParams.flags = NV0000_CTRL_CMD_GPU_GET_UUID_INFO_FLAGS_FORMAT_BINARY; 6451 status = pRmApi->Control(pRmApi, 6452 clientHandle, 6453 clientHandle, 6454 NV0000_CTRL_CMD_GPU_GET_UUID_INFO, 6455 &gpuIdInfoParams, 6456 sizeof(gpuIdInfoParams)); 6457 if (status != NV_OK) 6458 goto cleanup; 6459 6460 gpuId = gpuIdInfoParams.gpuId; 6461 6462 status = findVaspaceFromPid(pid, gpuId, &hPidClient, 6463 &hPidDevice, &hPidSubDevice, &hPidVaSpace); 6464 6465 // free the session we just created 6466 pRmApi->Free(pRmApi, clientHandle, clientHandle); 6467 if (status != NV_OK) 6468 goto cleanup; 6469 6470 *hClient = hPidClient; 6471 *hDevice = hPidDevice; 6472 *hSubDevice = hPidSubDevice; 6473 _nvGpuOpsLocksRelease(&acquiredLocks); 6474 threadStateFree(&threadState, THREAD_STATE_FLAGS_NONE); 6475 return NV_OK; 6476 6477 cleanup: 6478 *hClient = 0; 6479 *hDevice = 0; 6480 *hSubDevice = 0; 6481 _nvGpuOpsLocksRelease(&acquiredLocks); 6482 threadStateFree(&threadState, THREAD_STATE_FLAGS_NONE); 6483 return status; 6484 } 6485 6486 NV_STATUS nvGpuOpsSetPageDirectory(struct gpuAddressSpace *vaSpace, 6487 NvU64 physAddress, 6488 unsigned numEntries, 6489 NvBool bVidMemAperture, NvU32 pasid) 6490 { 6491 NV_STATUS status; 6492 nvGpuOpsLockSet acquiredLocks; 6493 THREAD_STATE_NODE threadState; 6494 NV0080_CTRL_DMA_SET_PAGE_DIRECTORY_PARAMS params = {0}; 6495 OBJGPU *pGpu = NULL; 6496 OBJVASPACE *pVAS = NULL; 6497 RsClient *pClient; 6498 RM_API *pRmApi = rmapiGetInterface(RMAPI_GPU_LOCK_INTERNAL); 6499 6500 if (!vaSpace || !numEntries) 6501 return NV_ERR_INVALID_ARGUMENT; 6502 6503 threadStateInit(&threadState, THREAD_STATE_FLAGS_NONE); 6504 status = _nvGpuOpsLocksAcquireAll(RMAPI_LOCK_FLAGS_READ, 6505 vaSpace->device->session->handle, 6506 &pClient, 6507 &acquiredLocks); 6508 if (status != NV_OK) 6509 { 6510 threadStateFree(&threadState, THREAD_STATE_FLAGS_NONE); 6511 return status; 6512 } 6513 6514 status = CliSetGpuContext(vaSpace->device->session->handle, 6515 vaSpace->device->handle, 6516 &pGpu, 6517 NULL); 6518 if (status != NV_OK) 6519 { 6520 _nvGpuOpsLocksRelease(&acquiredLocks); 6521 threadStateFree(&threadState, THREAD_STATE_FLAGS_NONE); 6522 return status; 6523 } 6524 6525 status = vaspaceGetByHandleOrDeviceDefault(pClient, 6526 vaSpace->device->handle, 6527 vaSpace->handle, 6528 &pVAS); 6529 if ((status != NV_OK) || (pVAS == NULL)) 6530 { 6531 _nvGpuOpsLocksRelease(&acquiredLocks); 6532 threadStateFree(&threadState, THREAD_STATE_FLAGS_NONE); 6533 return NV_ERR_INVALID_ARGUMENT; 6534 } 6535 6536 if (vaspaceIsExternallyOwned(pVAS)) 6537 { 6538 // make sure there is no PDB set if already externally owned 6539 if ((NULL != vaspaceGetPageDirBase(pVAS, pGpu))) 6540 { 6541 _nvGpuOpsLocksRelease(&acquiredLocks); 6542 threadStateFree(&threadState, THREAD_STATE_FLAGS_NONE); 6543 return NV_ERR_NOT_SUPPORTED; 6544 } 6545 6546 // Stop all channels under the VAS 6547 status = nvGpuOpsDisableVaSpaceChannels(vaSpace); 6548 if (status != NV_OK) 6549 { 6550 // 6551 // If stopping any channels failed, reenable the channels which were 6552 // able to be stopped before bailing 6553 // 6554 nvGpuOpsEnableVaSpaceChannels(vaSpace); 6555 _nvGpuOpsLocksRelease(&acquiredLocks); 6556 threadStateFree(&threadState, THREAD_STATE_FLAGS_NONE); 6557 return status; 6558 } 6559 } 6560 6561 params.physAddress = physAddress; 6562 params.numEntries = numEntries; 6563 params.hVASpace = vaSpace->handle; 6564 params.flags = bVidMemAperture ? 6565 DRF_DEF(0080, _CTRL_DMA_SET_PAGE_DIRECTORY_FLAGS, _APERTURE, _VIDMEM) : 6566 DRF_DEF(0080, _CTRL_DMA_SET_PAGE_DIRECTORY_FLAGS, _APERTURE, _SYSMEM_COH); 6567 params.flags |= DRF_DEF(0080, _CTRL_DMA_SET_PAGE_DIRECTORY_FLAGS, 6568 _ALL_CHANNELS, _TRUE); 6569 params.pasid = pasid; 6570 6571 // Always do Unicast by passing non-zero subDeviceId! 6572 params.subDeviceId = vaSpace->device->subdeviceInstance + 1; 6573 6574 status = pRmApi->Control(pRmApi, 6575 vaSpace->device->session->handle, 6576 vaSpace->device->handle, 6577 NV0080_CTRL_CMD_DMA_SET_PAGE_DIRECTORY, 6578 ¶ms, 6579 sizeof(params)); 6580 6581 if (vaspaceIsExternallyOwned(pVAS)) 6582 { 6583 // Reschedule all channels in this VAS 6584 nvGpuOpsEnableVaSpaceChannels(vaSpace); 6585 } 6586 6587 _nvGpuOpsLocksRelease(&acquiredLocks); 6588 threadStateFree(&threadState, THREAD_STATE_FLAGS_NONE); 6589 return status; 6590 } 6591 6592 NV_STATUS nvGpuOpsUnsetPageDirectory(struct gpuAddressSpace *vaSpace) 6593 { 6594 NV_STATUS status; 6595 nvGpuOpsLockSet acquiredLocks; 6596 THREAD_STATE_NODE threadState; 6597 NV0080_CTRL_DMA_UNSET_PAGE_DIRECTORY_PARAMS params = {0}; 6598 OBJGPU *pGpu = NULL; 6599 OBJVASPACE *pVAS = NULL; 6600 RsClient *pClient; 6601 RM_API *pRmApi = rmapiGetInterface(RMAPI_GPU_LOCK_INTERNAL); 6602 6603 if (!vaSpace) 6604 return NV_ERR_INVALID_ARGUMENT; 6605 6606 threadStateInit(&threadState, THREAD_STATE_FLAGS_NONE); 6607 status = _nvGpuOpsLocksAcquireAll(RMAPI_LOCK_FLAGS_READ, 6608 vaSpace->device->session->handle, 6609 &pClient, 6610 &acquiredLocks); 6611 if (status != NV_OK) 6612 { 6613 threadStateFree(&threadState, THREAD_STATE_FLAGS_NONE); 6614 return status; 6615 } 6616 6617 status = CliSetGpuContext(vaSpace->device->session->handle, 6618 vaSpace->device->handle, 6619 &pGpu, 6620 NULL); 6621 if (status != NV_OK) 6622 { 6623 _nvGpuOpsLocksRelease(&acquiredLocks); 6624 threadStateFree(&threadState, THREAD_STATE_FLAGS_NONE); 6625 return status; 6626 } 6627 6628 status = vaspaceGetByHandleOrDeviceDefault(pClient, 6629 vaSpace->device->handle, 6630 vaSpace->handle, 6631 &pVAS); 6632 if ((status != NV_OK) || (pVAS == NULL)) 6633 { 6634 _nvGpuOpsLocksRelease(&acquiredLocks); 6635 threadStateFree(&threadState, THREAD_STATE_FLAGS_NONE); 6636 return NV_ERR_INVALID_ARGUMENT; 6637 } 6638 6639 if (vaspaceIsExternallyOwned(pVAS)) 6640 { 6641 // Stop all channels under the VAS 6642 status = nvGpuOpsDisableVaSpaceChannels(vaSpace); 6643 if (status != NV_OK) 6644 { 6645 // 6646 // If stopping any channels failed, reenable the channels which were 6647 // able to be stopped before bailing 6648 // 6649 nvGpuOpsEnableVaSpaceChannels(vaSpace); 6650 _nvGpuOpsLocksRelease(&acquiredLocks); 6651 threadStateFree(&threadState, THREAD_STATE_FLAGS_NONE); 6652 return status; 6653 } 6654 } 6655 6656 params.hVASpace = vaSpace->handle; 6657 6658 // Always do Unicast by passing non-zero subDeviceId! 6659 params.subDeviceId = vaSpace->device->subdeviceInstance + 1; 6660 6661 status = pRmApi->Control(pRmApi, 6662 vaSpace->device->session->handle, 6663 vaSpace->device->handle, 6664 NV0080_CTRL_CMD_DMA_UNSET_PAGE_DIRECTORY, 6665 ¶ms, 6666 sizeof(params)); 6667 6668 if (vaspaceIsExternallyOwned(pVAS)) 6669 { 6670 // Reschedule all channels in this VAS 6671 nvGpuOpsEnableVaSpaceChannels(vaSpace); 6672 } 6673 6674 _nvGpuOpsLocksRelease(&acquiredLocks); 6675 threadStateFree(&threadState, THREAD_STATE_FLAGS_NONE); 6676 return status; 6677 } 6678 6679 NV_STATUS nvGpuOpsGetGmmuFmt(struct gpuAddressSpace *vaSpace, void **pFmt) 6680 { 6681 NV_STATUS status = NV_OK; 6682 nvGpuOpsLockSet acquiredLocks; 6683 THREAD_STATE_NODE threadState; 6684 NV90F1_CTRL_VASPACE_GET_GMMU_FORMAT_PARAMS params = {0}; 6685 RM_API *pRmApi = rmapiGetInterface(RMAPI_GPU_LOCK_INTERNAL); 6686 6687 if (!vaSpace || !pFmt) 6688 return NV_ERR_INVALID_ARGUMENT; 6689 6690 if (!vaSpace->handle) 6691 return NV_ERR_INVALID_OBJECT_HANDLE; 6692 6693 threadStateInit(&threadState, THREAD_STATE_FLAGS_NONE); 6694 status = _nvGpuOpsLocksAcquireAll(RMAPI_LOCK_FLAGS_READ, 6695 vaSpace->device->session->handle, 6696 NULL, 6697 &acquiredLocks); 6698 if (status != NV_OK) 6699 { 6700 threadStateFree(&threadState, THREAD_STATE_FLAGS_NONE); 6701 return status; 6702 } 6703 6704 *pFmt = NULL; 6705 params.hSubDevice = vaSpace->device->subhandle; 6706 6707 status = pRmApi->Control(pRmApi, 6708 vaSpace->device->session->handle, 6709 vaSpace->handle, 6710 NV90F1_CTRL_CMD_VASPACE_GET_GMMU_FORMAT, 6711 ¶ms, 6712 sizeof(params)); 6713 if (status == NV_OK) 6714 *pFmt = (void *)params.pFmt; 6715 6716 _nvGpuOpsLocksRelease(&acquiredLocks); 6717 threadStateFree(&threadState, THREAD_STATE_FLAGS_NONE); 6718 return status; 6719 } 6720 6721 NV_STATUS nvGpuOpsInvalidateTlb(struct gpuAddressSpace *vaSpace) 6722 { 6723 NV2080_CTRL_DMA_INVALIDATE_TLB_PARAMS params = {0}; 6724 RM_API *pRmApi = rmapiGetInterface(RMAPI_EXTERNAL_KERNEL); 6725 6726 if (!vaSpace) 6727 return NV_ERR_INVALID_ARGUMENT; 6728 6729 params.hVASpace = vaSpace->handle; 6730 return pRmApi->Control(pRmApi, 6731 vaSpace->device->session->handle, 6732 vaSpace->device->subhandle, 6733 NV2080_CTRL_CMD_DMA_INVALIDATE_TLB, 6734 ¶ms, 6735 sizeof(params)); 6736 } 6737 6738 NV_STATUS nvGpuOpsGetFbInfo(struct gpuDevice *device, gpuFbInfo *fbInfo) 6739 { 6740 NV_STATUS status; 6741 nvGpuOpsLockSet acquiredLocks; 6742 THREAD_STATE_NODE threadState; 6743 6744 if (!device || !fbInfo) 6745 return NV_ERR_INVALID_ARGUMENT; 6746 6747 threadStateInit(&threadState, THREAD_STATE_FLAGS_NONE); 6748 status = _nvGpuOpsLocksAcquireAll(RMAPI_LOCK_FLAGS_READ, 6749 device->session->handle, 6750 NULL, 6751 &acquiredLocks); 6752 if (status != NV_OK) 6753 { 6754 threadStateFree(&threadState, THREAD_STATE_FLAGS_NONE); 6755 return status; 6756 } 6757 6758 portMemCopy(fbInfo, sizeof(*fbInfo), &device->fbInfo, sizeof(*fbInfo)); 6759 6760 _nvGpuOpsLocksRelease(&acquiredLocks); 6761 threadStateFree(&threadState, THREAD_STATE_FLAGS_NONE); 6762 return NV_OK; 6763 } 6764 6765 NV_STATUS nvGpuOpsGetEccInfo(struct gpuDevice *device, gpuEccInfo *eccInfo) 6766 { 6767 subDeviceDesc *rmSubDevice; 6768 6769 if (!device || !eccInfo) 6770 return NV_ERR_INVALID_ARGUMENT; 6771 6772 rmSubDevice = device->rmSubDevice; 6773 6774 if (!rmSubDevice->bEccInitialized) 6775 return NV_ERR_NOT_SUPPORTED; 6776 6777 eccInfo->eccMask = rmSubDevice->eccMask; 6778 eccInfo->eccOffset = rmSubDevice->eccOffset; 6779 eccInfo->eccReadLocation = rmSubDevice->eccReadLocation; 6780 eccInfo->bEccEnabled = rmSubDevice->bEccEnabled; 6781 eccInfo->eccErrorNotifier = &rmSubDevice->eccErrorNotifier; 6782 6783 return NV_OK; 6784 } 6785 6786 // 6787 // Do not acquire the GPU locks as all nvGpuOpsFreeDupedHandle() does is 6788 // call pRmApi->Free(pRmApi, ) that drops the GPU locks if acquired (and 6789 // re-acquires it later). 6790 // 6791 NV_STATUS nvGpuOpsFreeDupedHandle(struct gpuDevice *device, 6792 NvHandle hPhysHandle) 6793 { 6794 NV_STATUS status = NV_OK; 6795 nvGpuOpsLockSet acquiredLocks; 6796 THREAD_STATE_NODE threadState; 6797 Memory *pMemory = NULL; 6798 OBJGPU *pMappingGpu = NULL; 6799 RM_API *pRmApi = rmapiGetInterface(RMAPI_GPU_LOCK_INTERNAL); 6800 NvHandle hClient; 6801 NvHandle hSubDevice; 6802 6803 if (!device) 6804 return NV_ERR_INVALID_ARGUMENT; 6805 6806 hClient = device->session->handle; 6807 6808 threadStateInit(&threadState, THREAD_STATE_FLAGS_NONE); 6809 status = _nvGpuOpsLocksAcquire(RMAPI_LOCK_FLAGS_READ, hClient, NULL, 0, 0, 0, &acquiredLocks); 6810 if (status != NV_OK) 6811 { 6812 threadStateFree(&threadState, THREAD_STATE_FLAGS_NONE); 6813 return status; 6814 } 6815 6816 status = CliSetSubDeviceContext(device->session->handle, 6817 device->subhandle, 6818 &hSubDevice, 6819 &pMappingGpu); 6820 if (status != NV_OK) 6821 goto out; 6822 6823 status = nvGpuOpsGetMemoryByHandle(device->session->handle, 6824 hPhysHandle, 6825 &pMemory); 6826 if (status != NV_OK) 6827 goto out; 6828 6829 if (memdescIsSysmem(pMemory->pMemDesc)) 6830 { 6831 // Release the mappings acquired in nvGpuOpsDupMemory(). 6832 // 6833 // TODO: Bug 1811060: Add native support for this use-case in RM API. 6834 memdescUnmapIommu(pMemory->pMemDesc, pMappingGpu->busInfo.iovaspaceId); 6835 } 6836 6837 out: 6838 pRmApi->Free(pRmApi, device->session->handle, hPhysHandle); 6839 _nvGpuOpsLocksRelease(&acquiredLocks); 6840 threadStateFree(&threadState, THREAD_STATE_FLAGS_NONE); 6841 return status; 6842 } 6843 6844 NV_STATUS nvGpuOpsInitFaultInfo(struct gpuDevice *device, 6845 gpuFaultInfo *pFaultInfo) 6846 { 6847 struct gpuSession *session = device->session; 6848 NV_STATUS status = NV_OK; 6849 NVB069_ALLOCATION_PARAMETERS faultBufferAllocParams = {0}; 6850 NVB069_CTRL_FAULTBUFFER_GET_SIZE_PARAMS sizeParams = {0}; 6851 NVB069_CTRL_CMD_FAULTBUFFER_GET_REGISTER_MAPPINGS_PARAMS registermappingsParams = {0}; 6852 void *bufferAddress = NULL; 6853 NvU32 faultBufferSize = 0; 6854 RM_API *pRmApi = rmapiGetInterface(RMAPI_EXTERNAL_KERNEL); 6855 6856 pFaultInfo->faultBufferHandle = NV01_NULL_OBJECT; 6857 status = pRmApi->Alloc(pRmApi, 6858 session->handle, 6859 device->subhandle, 6860 &pFaultInfo->faultBufferHandle, 6861 device->faultBufferClass, 6862 &faultBufferAllocParams); 6863 if (status != NV_OK) 6864 goto cleanup; 6865 6866 6867 { 6868 // Get the Size of the fault buffer 6869 status = pRmApi->Control(pRmApi, 6870 session->handle, 6871 pFaultInfo->faultBufferHandle, 6872 NVB069_CTRL_CMD_FAULTBUFFER_GET_SIZE, 6873 &sizeParams, 6874 sizeof(sizeParams)); 6875 if (status != NV_OK) 6876 goto cleanup_fault_buffer; 6877 6878 faultBufferSize = sizeParams.faultBufferSize; 6879 6880 // Map the fault buffer pointer to CPU 6881 status = pRmApi->MapToCpu(pRmApi, 6882 session->handle, 6883 device->subhandle, 6884 pFaultInfo->faultBufferHandle, 6885 0, 6886 pFaultInfo->replayable.bufferSize, 6887 &bufferAddress, 6888 0); 6889 if (status != NV_OK) 6890 goto cleanup_fault_buffer; 6891 } 6892 6893 if (isDeviceVoltaPlus(device)) 6894 { 6895 NVC369_CTRL_MMU_FAULT_BUFFER_REGISTER_NON_REPLAY_BUF_PARAMS nonReplayableFaultsParams = {0}; 6896 6897 status = pRmApi->Control(pRmApi, 6898 session->handle, 6899 pFaultInfo->faultBufferHandle, 6900 NVC369_CTRL_CMD_MMU_FAULT_BUFFER_REGISTER_NON_REPLAY_BUF, 6901 &nonReplayableFaultsParams, 6902 sizeof(nonReplayableFaultsParams)); 6903 if (status != NV_OK) 6904 goto cleanup_fault_buffer; 6905 6906 pFaultInfo->nonReplayable.shadowBufferAddress = (void *)NvP64_VALUE(nonReplayableFaultsParams.pShadowBuffer); 6907 pFaultInfo->nonReplayable.shadowBufferContext = (void *)NvP64_VALUE(nonReplayableFaultsParams.pShadowBufferContext); 6908 pFaultInfo->nonReplayable.bufferSize = nonReplayableFaultsParams.bufferSize; 6909 } 6910 6911 registermappingsParams.faultBufferType = NVB069_CTRL_FAULT_BUFFER_REPLAYABLE; 6912 status = pRmApi->Control(pRmApi, 6913 session->handle, 6914 pFaultInfo->faultBufferHandle, 6915 NVB069_CTRL_CMD_FAULTBUFFER_GET_REGISTER_MAPPINGS, 6916 ®istermappingsParams, 6917 sizeof(registermappingsParams)); 6918 if (status != NV_OK) 6919 goto cleanup_fault_buffer; 6920 6921 pFaultInfo->replayable.pFaultBufferGet = (NvU32*)(NvUPtr)registermappingsParams.pFaultBufferGet; 6922 pFaultInfo->replayable.pFaultBufferPut = (NvU32*)(NvUPtr)registermappingsParams.pFaultBufferPut; 6923 pFaultInfo->replayable.pFaultBufferInfo = (NvU32*)(NvUPtr)registermappingsParams.pFaultBufferInfo; 6924 pFaultInfo->replayable.pPmcIntr = (NvU32*)(NvUPtr)registermappingsParams.pPmcIntr; 6925 pFaultInfo->replayable.pPmcIntrEnSet = (NvU32*)(NvUPtr)registermappingsParams.pPmcIntrEnSet; 6926 pFaultInfo->replayable.pPmcIntrEnClear = (NvU32*)(NvUPtr)registermappingsParams.pPmcIntrEnClear; 6927 pFaultInfo->replayable.replayableFaultMask = registermappingsParams.replayableFaultMask; 6928 pFaultInfo->replayable.pPrefetchCtrl = (NvU32*)(NvUPtr)registermappingsParams.pPrefetchCtrl; 6929 pFaultInfo->replayable.bufferSize = faultBufferSize; 6930 pFaultInfo->replayable.bufferAddress = bufferAddress; 6931 6932 return NV_OK; 6933 6934 cleanup_fault_buffer: 6935 { 6936 gpuDeviceUnmapCpuFreeHandle(device, 6937 pFaultInfo->faultBufferHandle, 6938 pFaultInfo->replayable.bufferAddress, 6939 0); 6940 } 6941 cleanup: 6942 portMemSet(pFaultInfo, 0, sizeof(*pFaultInfo)); 6943 return status; 6944 } 6945 6946 NV_STATUS nvGpuOpsInitAccessCntrInfo(struct gpuDevice *device, 6947 gpuAccessCntrInfo *pAccessCntrInfo) 6948 { 6949 struct gpuSession *session = device->session; 6950 NV_STATUS status = NV_OK; 6951 NvU32 accessCntrBufferAllocParams = {0}; 6952 NVC365_CTRL_ACCESS_CNTR_BUFFER_GET_SIZE_PARAMS sizeParams = {0}; 6953 NVC365_CTRL_ACCESS_CNTR_BUFFER_GET_REGISTER_MAPPINGS_PARAMS registermappings; 6954 void *bufferAddress; 6955 NV0080_CTRL_BIF_GET_DMA_BASE_SYSMEM_ADDR_PARAMS getDmaBaseSysmemAddrParams = {0}; 6956 RM_API *pRmApi = rmapiGetInterface(RMAPI_EXTERNAL_KERNEL); 6957 OBJGPU *pGpu = NULL; 6958 6959 // TODO: Acquired because CliSetGpuContext expects RMAPI lock. Necessary? 6960 status = rmapiLockAcquire(RMAPI_LOCK_FLAGS_READ, RM_LOCK_MODULES_GPU_OPS); 6961 if (status != NV_OK) 6962 return status; 6963 status = CliSetGpuContext(session->handle, device->handle, &pGpu, NULL); 6964 rmapiLockRelease(); 6965 if (status != NV_OK) 6966 return status; 6967 6968 pAccessCntrInfo->accessCntrBufferHandle = NV01_NULL_OBJECT; 6969 status = pRmApi->Alloc(pRmApi, 6970 session->handle, 6971 device->subhandle, 6972 &pAccessCntrInfo->accessCntrBufferHandle, 6973 device->accessCounterBufferClass, 6974 &accessCntrBufferAllocParams); 6975 6976 if (status != NV_OK) 6977 goto cleanup; 6978 6979 status = pRmApi->MapToCpu(pRmApi, session->handle, device->subhandle, pAccessCntrInfo->accessCntrBufferHandle, 6980 0, pAccessCntrInfo->bufferSize, &bufferAddress, 0); 6981 6982 if (status != NV_OK) 6983 goto cleanup_access_ctr_buffer; 6984 6985 pAccessCntrInfo->bufferAddress = bufferAddress; 6986 6987 status = pRmApi->Control(pRmApi, 6988 session->handle, 6989 pAccessCntrInfo->accessCntrBufferHandle, 6990 NVC365_CTRL_CMD_ACCESS_CNTR_BUFFER_GET_SIZE, 6991 &sizeParams, 6992 sizeof(sizeParams)); 6993 6994 if (status != NV_OK) 6995 goto cleanup_access_ctr_buffer; 6996 6997 pAccessCntrInfo->bufferSize = sizeParams.accessCntrBufferSize; 6998 6999 status = pRmApi->Control(pRmApi, 7000 session->handle, 7001 pAccessCntrInfo->accessCntrBufferHandle, 7002 NVC365_CTRL_CMD_ACCESS_CNTR_BUFFER_GET_REGISTER_MAPPINGS, 7003 ®istermappings, 7004 sizeof(registermappings)); 7005 if (status != NV_OK) 7006 goto cleanup_access_ctr_buffer; 7007 7008 status = pRmApi->Control(pRmApi, 7009 session->handle, 7010 device->handle, 7011 NV0080_CTRL_CMD_BIF_GET_DMA_BASE_SYSMEM_ADDR, 7012 &getDmaBaseSysmemAddrParams, 7013 sizeof(getDmaBaseSysmemAddrParams)); 7014 if (status != NV_OK) 7015 goto cleanup_access_ctr_buffer; 7016 7017 pAccessCntrInfo->pAccessCntrBufferGet = (NvU32*)(NvUPtr)registermappings.pAccessCntrBufferGet; 7018 pAccessCntrInfo->pAccessCntrBufferPut = (NvU32*)(NvUPtr)registermappings.pAccessCntrBufferPut; 7019 pAccessCntrInfo->pAccessCntrBufferFull = (NvU32*)(NvUPtr)registermappings.pAccessCntrBufferFull; 7020 pAccessCntrInfo->pHubIntr = (NvU32*)(NvUPtr)registermappings.pHubIntr; 7021 pAccessCntrInfo->pHubIntrEnSet = (NvU32*)(NvUPtr)registermappings.pHubIntrEnSet; 7022 pAccessCntrInfo->pHubIntrEnClear = (NvU32*)(NvUPtr)registermappings.pHubIntrEnClear; 7023 pAccessCntrInfo->accessCounterMask = registermappings.accessCntrMask; 7024 pAccessCntrInfo->baseDmaSysmemAddr = getDmaBaseSysmemAddrParams.baseDmaSysmemAddr; 7025 7026 return NV_OK; 7027 7028 cleanup_access_ctr_buffer: 7029 gpuDeviceUnmapCpuFreeHandle(device, 7030 pAccessCntrInfo->accessCntrBufferHandle, 7031 pAccessCntrInfo->bufferAddress, 7032 0); 7033 cleanup: 7034 pAccessCntrInfo->accessCntrBufferHandle = 0; 7035 pAccessCntrInfo->bufferAddress = 0; 7036 return status; 7037 } 7038 7039 static NV_STATUS 7040 getAccessCounterGranularityValue(UVM_ACCESS_COUNTER_GRANULARITY granularity, NvU32 *value) 7041 { 7042 *value = 0; 7043 7044 switch (granularity) 7045 { 7046 case UVM_ACCESS_COUNTER_GRANULARITY_64K: 7047 *value = NVC365_CTRL_ACCESS_COUNTER_GRANULARITY_64K; 7048 break; 7049 case UVM_ACCESS_COUNTER_GRANULARITY_2M: 7050 *value = NVC365_CTRL_ACCESS_COUNTER_GRANULARITY_2M; 7051 break; 7052 case UVM_ACCESS_COUNTER_GRANULARITY_16M: 7053 *value = NVC365_CTRL_ACCESS_COUNTER_GRANULARITY_16M; 7054 break; 7055 case UVM_ACCESS_COUNTER_GRANULARITY_16G: 7056 *value = NVC365_CTRL_ACCESS_COUNTER_GRANULARITY_16G; 7057 break; 7058 default: 7059 return NV_ERR_INVALID_ARGUMENT; 7060 }; 7061 7062 return NV_OK; 7063 } 7064 7065 static NV_STATUS 7066 getAccessCounterLimitValue(UVM_ACCESS_COUNTER_USE_LIMIT limit, NvU32 *value) 7067 { 7068 *value = 0; 7069 7070 switch (limit) 7071 { 7072 case UVM_ACCESS_COUNTER_USE_LIMIT_NONE: 7073 *value = NVC365_CTRL_ACCESS_COUNTER_USE_LIMIT_NONE; 7074 break; 7075 case UVM_ACCESS_COUNTER_USE_LIMIT_QTR: 7076 *value = NVC365_CTRL_ACCESS_COUNTER_USE_LIMIT_QTR; 7077 break; 7078 case UVM_ACCESS_COUNTER_USE_LIMIT_HALF: 7079 *value = NVC365_CTRL_ACCESS_COUNTER_USE_LIMIT_HALF; 7080 break; 7081 case UVM_ACCESS_COUNTER_USE_LIMIT_FULL: 7082 *value = NVC365_CTRL_ACCESS_COUNTER_USE_LIMIT_FULL; 7083 break; 7084 default: 7085 return NV_ERR_INVALID_ARGUMENT; 7086 }; 7087 7088 return NV_OK; 7089 } 7090 7091 NV_STATUS nvGpuOpsEnableAccessCntr(struct gpuDevice *device, 7092 gpuAccessCntrInfo *pAccessCntrInfo, 7093 gpuAccessCntrConfig *pAccessCntrConfig) 7094 { 7095 NV_STATUS status = NV_OK; 7096 NVC365_CTRL_ACCESS_CNTR_SET_CONFIG_PARAMS setConfigParams = { 0 }; 7097 NVC365_CTRL_ACCESS_CNTR_BUFFER_ENABLE_PARAMS enableParams = { 0 }; 7098 struct gpuSession *session = device->session; 7099 RM_API *pRmApi = rmapiGetInterface(RMAPI_EXTERNAL_KERNEL); 7100 7101 status = getAccessCounterGranularityValue(pAccessCntrConfig->mimcGranularity, &setConfigParams.mimcGranularity); 7102 if (status != NV_OK) 7103 return status; 7104 7105 status = getAccessCounterGranularityValue(pAccessCntrConfig->momcGranularity, &setConfigParams.momcGranularity); 7106 if (status != NV_OK) 7107 return status; 7108 7109 status = getAccessCounterLimitValue(pAccessCntrConfig->mimcUseLimit, &setConfigParams.mimcLimit); 7110 if (status != NV_OK) 7111 return status; 7112 7113 status = getAccessCounterLimitValue(pAccessCntrConfig->momcUseLimit, &setConfigParams.momcLimit); 7114 if (status != NV_OK) 7115 return status; 7116 7117 setConfigParams.threshold = pAccessCntrConfig->threshold; 7118 setConfigParams.cmd = NVC365_CTRL_ACCESS_COUNTER_SET_MIMC_GRANULARITY | 7119 NVC365_CTRL_ACCESS_COUNTER_SET_MOMC_GRANULARITY | 7120 NVC365_CTRL_ACCESS_COUNTER_SET_MIMC_LIMIT | 7121 NVC365_CTRL_ACCESS_COUNTER_SET_MOMC_LIMIT | 7122 NVC365_CTRL_ACCESS_COUNTER_SET_THRESHOLD; 7123 7124 status = pRmApi->Control(pRmApi, 7125 session->handle, 7126 pAccessCntrInfo->accessCntrBufferHandle, 7127 NVC365_CTRL_CMD_ACCESS_CNTR_SET_CONFIG, 7128 &setConfigParams, 7129 sizeof(setConfigParams)); 7130 if (status != NV_OK) 7131 return status; 7132 7133 enableParams.intrOwnership = NVC365_CTRL_ACCESS_COUNTER_INTERRUPT_OWNERSHIP_NOT_RM; 7134 enableParams.enable = NV_TRUE; 7135 7136 status = pRmApi->Control(pRmApi, 7137 session->handle, 7138 pAccessCntrInfo->accessCntrBufferHandle, 7139 NVC365_CTRL_CMD_ACCESS_CNTR_BUFFER_ENABLE, 7140 &enableParams, 7141 sizeof(enableParams)); 7142 return status; 7143 } 7144 7145 NV_STATUS nvGpuOpsDisableAccessCntr(struct gpuDevice *device, 7146 gpuAccessCntrInfo *pAccessCntrInfo) 7147 { 7148 RM_API *pRmApi = rmapiGetInterface(RMAPI_EXTERNAL_KERNEL); 7149 NVC365_CTRL_ACCESS_CNTR_BUFFER_ENABLE_PARAMS enableParams = { 0 }; 7150 7151 enableParams.intrOwnership = NVC365_CTRL_ACCESS_COUNTER_INTERRUPT_OWNERSHIP_RM; 7152 enableParams.enable = NV_FALSE; 7153 return pRmApi->Control(pRmApi, 7154 device->session->handle, 7155 pAccessCntrInfo->accessCntrBufferHandle, 7156 NVC365_CTRL_CMD_ACCESS_CNTR_BUFFER_ENABLE, 7157 &enableParams, 7158 sizeof(enableParams)); 7159 } 7160 7161 NV_STATUS nvGpuOpsDestroyAccessCntrInfo(struct gpuDevice *device, 7162 gpuAccessCntrInfo *pAccessCntrInfo) 7163 { 7164 gpuDeviceUnmapCpuFreeHandle(device, 7165 pAccessCntrInfo->accessCntrBufferHandle, 7166 pAccessCntrInfo->bufferAddress, 7167 0); 7168 portMemSet(pAccessCntrInfo, 0, sizeof(gpuAccessCntrInfo)); 7169 return NV_OK; 7170 } 7171 7172 NV_STATUS nvGpuOpsDestroyFaultInfo(struct gpuDevice *device, 7173 gpuFaultInfo *pFaultInfo) 7174 { 7175 NV_STATUS status = NV_OK; 7176 RM_API *pRmApi = rmapiGetInterface(RMAPI_EXTERNAL_KERNEL); 7177 7178 if (pFaultInfo->faultBufferHandle && isDeviceVoltaPlus(device)) 7179 { 7180 NVC369_CTRL_MMU_FAULT_BUFFER_UNREGISTER_NON_REPLAY_BUF_PARAMS params = {0}; 7181 7182 params.pShadowBuffer = NV_PTR_TO_NvP64(pFaultInfo->nonReplayable.shadowBufferAddress); 7183 7184 status = pRmApi->Control(pRmApi, 7185 device->session->handle, 7186 pFaultInfo->faultBufferHandle, 7187 NVC369_CTRL_CMD_MMU_FAULT_BUFFER_UNREGISTER_NON_REPLAY_BUF, 7188 ¶ms, 7189 sizeof(params)); 7190 NV_ASSERT(status == NV_OK); 7191 } 7192 7193 { 7194 gpuDeviceUnmapCpuFreeHandle(device, 7195 pFaultInfo->faultBufferHandle, 7196 pFaultInfo->replayable.bufferAddress, 7197 0); 7198 } 7199 7200 portMemSet(pFaultInfo, 0, sizeof(gpuFaultInfo)); 7201 return status; 7202 } 7203 7204 NV_STATUS nvGpuOpsHasPendingNonReplayableFaults(gpuFaultInfo *pFaultInfo, 7205 NvBool *hasPendingFaults) 7206 { 7207 GMMU_SHADOW_FAULT_BUF *pQueue = 7208 (GMMU_SHADOW_FAULT_BUF *) pFaultInfo->nonReplayable.shadowBufferAddress; 7209 7210 if (!pQueue || !hasPendingFaults) 7211 return NV_ERR_INVALID_ARGUMENT; 7212 7213 *hasPendingFaults = !queueIsEmpty(pQueue); 7214 7215 return NV_OK; 7216 } 7217 7218 NV_STATUS nvGpuOpsGetNonReplayableFaults(gpuFaultInfo *pFaultInfo, 7219 void *faultBuffer, 7220 NvU32 *numFaults) 7221 { 7222 GMMU_SHADOW_FAULT_BUF *pQueue = 7223 (GMMU_SHADOW_FAULT_BUF *) pFaultInfo->nonReplayable.shadowBufferAddress; 7224 QueueContext *pQueueCtx = 7225 (QueueContext *) pFaultInfo->nonReplayable.shadowBufferContext; 7226 7227 if (!pQueue || !faultBuffer || !numFaults) 7228 return NV_ERR_INVALID_ARGUMENT; 7229 7230 *numFaults = 0; 7231 7232 // Copy all faults in the client shadow fault buffer to the given buffer 7233 while (queuePopAndCopyNonManaged(pQueue, pQueueCtx, faultBuffer)) 7234 { 7235 ++(*numFaults); 7236 faultBuffer = (char *)faultBuffer + NVC369_BUF_SIZE; 7237 } 7238 7239 return NV_OK; 7240 } 7241 7242 NV_STATUS nvGpuOpsFlushReplayableFaultBuffer(struct gpuDevice *device) 7243 { 7244 NV_STATUS status; 7245 NvHandle hClient = device->session->handle; 7246 RsClient *pClient; 7247 Device *pDevice; 7248 OBJGPU *pGpu; 7249 KernelGmmu *pKernelGmmu; 7250 7251 status = serverGetClientUnderLock(&g_resServ, hClient, &pClient); 7252 if (status != NV_OK) 7253 return NV_ERR_INVALID_ARGUMENT; 7254 7255 status = deviceGetByHandle(pClient, device->handle, &pDevice); 7256 if (status != NV_OK) 7257 return NV_ERR_INVALID_ARGUMENT; 7258 7259 GPU_RES_SET_THREAD_BC_STATE(pDevice); 7260 7261 pGpu = GPU_RES_GET_GPU(pDevice); 7262 pKernelGmmu = GPU_GET_KERNEL_GMMU(pGpu); 7263 7264 return kgmmuIssueReplayableFaultBufferFlush_HAL(pGpu, pKernelGmmu); 7265 } 7266 7267 static NV_STATUS nvGpuOpsVerifyChannel(struct gpuAddressSpace *vaSpace, 7268 NvHandle hClient, 7269 NvHandle hKernelChannel, 7270 OBJGPU **pGpu, 7271 KernelChannel **ppKernelChannel) 7272 { 7273 NV_STATUS status = NV_OK; 7274 NvHandle hDevice, hSubDevice; 7275 OBJVASPACE *pVAS = NULL; 7276 OBJGPU *pVaSpaceGpu; 7277 RsClient *pClient; 7278 7279 NV_ASSERT_OR_RETURN(ppKernelChannel != NULL, NV_ERR_INVALID_ARGUMENT); 7280 7281 status = serverGetClientUnderLock(&g_resServ, vaSpace->device->session->handle, &pClient); 7282 if (status != NV_OK) 7283 return status; 7284 7285 status = vaspaceGetByHandleOrDeviceDefault(pClient, 7286 vaSpace->device->handle, 7287 vaSpace->handle, 7288 &pVAS); 7289 if (status != NV_OK) 7290 return status; 7291 7292 status = CliGetKernelChannel(hClient, hKernelChannel, ppKernelChannel); 7293 if (status != NV_OK) 7294 return NV_ERR_INVALID_OBJECT_HANDLE; 7295 7296 hDevice = RES_GET_HANDLE(GPU_RES_GET_DEVICE(*ppKernelChannel)); 7297 status = CliSetGpuContext(hClient, hDevice, pGpu, NULL); 7298 if (status != NV_OK) 7299 return status; 7300 7301 if ((*ppKernelChannel)->pVAS != pVAS) 7302 { 7303 if (CliSetGpuContext(vaSpace->device->session->handle, 7304 vaSpace->device->handle, 7305 &pVaSpaceGpu, 7306 NULL) == NV_OK && pVaSpaceGpu != *pGpu) 7307 return NV_ERR_OTHER_DEVICE_FOUND; 7308 7309 return NV_ERR_INVALID_CHANNEL; 7310 } 7311 7312 // In SLI config, RM's internal allocations such as channel instance 7313 // are tracked with a memdesc per subdevice. Hence, Get the correct pGpu. 7314 status = CliSetSubDeviceContext(vaSpace->device->session->handle, 7315 vaSpace->device->subhandle, 7316 &hSubDevice, 7317 pGpu); 7318 if (status != NV_OK) 7319 return status; 7320 7321 return NV_OK; 7322 } 7323 7324 static NV_STATUS nvGpuOpsGetChannelEngineType(OBJGPU *pGpu, 7325 KernelChannel *pKernelChannel, 7326 UVM_GPU_CHANNEL_ENGINE_TYPE *engineType) 7327 { 7328 KernelFifo *pKernelFifo = GPU_GET_KERNEL_FIFO(pGpu); 7329 NvU32 engDesc; 7330 RM_ENGINE_TYPE rmEngineType; 7331 NV_STATUS status; 7332 7333 NV_ASSERT_OR_RETURN(pKernelChannel != NULL, NV_ERR_INVALID_ARGUMENT); 7334 7335 status = kchannelGetEngine_HAL(pGpu, pKernelChannel, &engDesc); 7336 if (status != NV_OK) 7337 return status; 7338 7339 status = kfifoEngineInfoXlate_HAL(pGpu, 7340 pKernelFifo, 7341 ENGINE_INFO_TYPE_ENG_DESC, 7342 engDesc, 7343 ENGINE_INFO_TYPE_RM_ENGINE_TYPE, 7344 (NvU32 *)&rmEngineType); 7345 if (status != NV_OK) 7346 return status; 7347 7348 if (RM_ENGINE_TYPE_IS_GR(rmEngineType)) 7349 *engineType = UVM_GPU_CHANNEL_ENGINE_TYPE_GR; 7350 else if (rmEngineType == RM_ENGINE_TYPE_SEC2) 7351 *engineType = UVM_GPU_CHANNEL_ENGINE_TYPE_SEC2; 7352 else 7353 *engineType = UVM_GPU_CHANNEL_ENGINE_TYPE_CE; 7354 7355 return NV_OK; 7356 } 7357 7358 static void _memdescRetain(MEMORY_DESCRIPTOR *pMemDesc) 7359 { 7360 if (pMemDesc->Allocated > 0) 7361 { 7362 pMemDesc->Allocated++; 7363 } 7364 7365 memdescAddRef(pMemDesc); 7366 } 7367 7368 static NV_STATUS nvGpuOpsGetChannelInstanceMemInfo(gpuRetainedChannel *retainedChannel, 7369 gpuChannelInstanceInfo *channelInstanceInfo) 7370 { 7371 PMEMORY_DESCRIPTOR pMemDesc = NULL; 7372 NV2080_CTRL_FIFO_MEM_INFO instanceMemInfo; 7373 NV_STATUS status; 7374 KernelFifo *pKernelFifo = GPU_GET_KERNEL_FIFO(retainedChannel->pGpu); 7375 KernelChannel *pKernelChannel = NULL; 7376 CHID_MGR *pChidMgr = kfifoGetChidMgr(retainedChannel->pGpu, 7377 pKernelFifo, 7378 retainedChannel->runlistId); 7379 7380 pKernelChannel = kfifoChidMgrGetKernelChannel(retainedChannel->pGpu, 7381 pKernelFifo, 7382 pChidMgr, 7383 channelInstanceInfo->chId); 7384 NV_CHECK_OR_RETURN(LEVEL_ERROR, pKernelChannel != NULL, NV_ERR_INVALID_CHANNEL); 7385 7386 status = kfifoChannelGetFifoContextMemDesc_HAL(retainedChannel->pGpu, 7387 pKernelFifo, 7388 pKernelChannel, 7389 FIFO_CTX_INST_BLOCK, 7390 &pMemDesc); 7391 if (status != NV_OK) 7392 return status; 7393 7394 pMemDesc = memdescGetMemDescFromGpu(pMemDesc, retainedChannel->pGpu); 7395 7396 kfifoFillMemInfo(pKernelFifo, pMemDesc, &instanceMemInfo); 7397 7398 if (instanceMemInfo.aperture == NV2080_CTRL_CMD_FIFO_GET_CHANNEL_MEM_APERTURE_INVALID) 7399 return NV_ERR_INVALID_OBJECT_HANDLE; 7400 7401 retainedChannel->instanceMemDesc = pMemDesc; 7402 channelInstanceInfo->base = instanceMemInfo.base; 7403 channelInstanceInfo->sysmem = (instanceMemInfo.aperture != NV2080_CTRL_CMD_FIFO_GET_CHANNEL_MEM_APERTURE_VIDMEM); 7404 7405 return NV_OK; 7406 } 7407 7408 static NV_STATUS nvGpuOpsGetChannelTsgInfo(gpuRetainedChannel *retainedChannel, 7409 gpuChannelInstanceInfo *channelInstanceInfo, 7410 KernelChannel *pKernelChannel) 7411 { 7412 OBJGPU *pGpu = retainedChannel->pGpu; 7413 KernelFifo *pKernelFifo = GPU_GET_KERNEL_FIFO(pGpu); 7414 RM_API *pRmApi = rmapiGetInterface(RMAPI_GPU_LOCK_INTERNAL); 7415 KernelChannelGroup *pKernelChannelGroup; 7416 NvHandle hDupTsg; 7417 NvU32 tsgMaxSubctxCount; 7418 NV_STATUS status; 7419 NvBool bLockAcquire = NV_FALSE; 7420 7421 NV_ASSERT_OR_RETURN(pKernelChannel != NULL, NV_ERR_INVALID_ARGUMENT); 7422 pKernelChannelGroup = pKernelChannel->pKernelChannelGroupApi->pKernelChannelGroup; 7423 NV_ASSERT_OR_RETURN(pKernelChannelGroup != NULL, NV_ERR_INVALID_STATE); 7424 7425 if (rmGpuLockIsOwner()) 7426 { 7427 rmGpuLocksRelease(GPUS_LOCK_FLAGS_NONE, NULL); 7428 bLockAcquire = NV_TRUE; 7429 } 7430 // Take a reference on the TSG ID by duping the TSG. Note that this is 7431 // the easy way out because we dup more than strictly necessary: every 7432 // channel registered under the same TSG will re-dup that TSG. In 7433 // practice there's very little overhead to re-duping the TSG for each 7434 // channel. 7435 hDupTsg = NV01_NULL_OBJECT; 7436 status = pRmApi->DupObject(pRmApi, 7437 retainedChannel->session->handle, 7438 retainedChannel->rmDevice->deviceHandle, 7439 &hDupTsg, 7440 RES_GET_CLIENT_HANDLE(pKernelChannel), 7441 RES_GET_HANDLE(pKernelChannel->pKernelChannelGroupApi), 7442 NV04_DUP_HANDLE_FLAGS_REJECT_KERNEL_DUP_PRIVILEGE); 7443 if (status != NV_OK) 7444 { 7445 if (bLockAcquire) 7446 { 7447 NV_ASSERT_OK_OR_RETURN(rmGpuLocksAcquire(GPUS_LOCK_FLAGS_NONE, RM_LOCK_MODULES_GPU_OPS)); 7448 } 7449 return status; 7450 } 7451 7452 if (bLockAcquire) 7453 { 7454 if ((status = rmGpuLocksAcquire(GPUS_LOCK_FLAGS_NONE, RM_LOCK_MODULES_GPU_OPS)) != NV_OK) 7455 { 7456 pRmApi->Free(pRmApi, retainedChannel->session->handle, hDupTsg); 7457 return status; 7458 } 7459 } 7460 7461 tsgMaxSubctxCount = kfifoChannelGroupGetLocalMaxSubcontext_HAL( 7462 pGpu, pKernelFifo, 7463 pKernelChannelGroup, 7464 pKernelChannelGroup->bLegacyMode); 7465 7466 channelInstanceInfo->bTsgChannel = NV_TRUE; 7467 channelInstanceInfo->tsgId = pKernelChannelGroup->grpID; 7468 channelInstanceInfo->tsgMaxSubctxCount = tsgMaxSubctxCount; 7469 7470 retainedChannel->hDupTsg = hDupTsg; 7471 7472 return NV_OK; 7473 } 7474 7475 7476 static NV_STATUS nvGpuOpsGetChannelSmcInfo(gpuRetainedChannel *retainedChannel, 7477 gpuChannelInstanceInfo *channelInstanceInfo, 7478 KernelChannel *pKernelChannel, 7479 struct gpuDevice *device) 7480 { 7481 channelInstanceInfo->smcEngineId = 0; 7482 channelInstanceInfo->smcEngineVeIdOffset = 0; 7483 7484 NV_ASSERT_OR_RETURN(pKernelChannel != NULL, NV_ERR_INVALID_ARGUMENT); 7485 7486 if (isDeviceAmperePlus(device) && retainedChannel->channelEngineType == UVM_GPU_CHANNEL_ENGINE_TYPE_GR) 7487 { 7488 OBJGPU *pGpu = retainedChannel->pGpu; 7489 7490 if (IS_MIG_IN_USE(pGpu)) 7491 { 7492 NvU32 grFaultId; 7493 NvU32 grMmuFaultEngId; 7494 7495 const NvU32 grIdx = RM_ENGINE_TYPE_GR_IDX(kchannelGetEngineType(pKernelChannel)); 7496 7497 NV_ASSERT_OK_OR_RETURN(kfifoEngineInfoXlate_HAL(pGpu, 7498 GPU_GET_KERNEL_FIFO(pGpu), 7499 ENGINE_INFO_TYPE_ENG_DESC, 7500 ENG_GR(grIdx), 7501 ENGINE_INFO_TYPE_MMU_FAULT_ID, 7502 &grFaultId)); 7503 7504 grMmuFaultEngId = kgmmuGetGraphicsEngineId_HAL(GPU_GET_KERNEL_GMMU(pGpu)); 7505 NV_ASSERT(grFaultId >= grMmuFaultEngId); 7506 7507 channelInstanceInfo->smcEngineId = grIdx; 7508 channelInstanceInfo->smcEngineVeIdOffset = grFaultId - grMmuFaultEngId; 7509 } 7510 } 7511 7512 return NV_OK; 7513 } 7514 7515 7516 static void nvGpuOpsGetChannelSubctxInfo(gpuRetainedChannel *retainedChannel, 7517 gpuChannelInstanceInfo *channelInstanceInfo, 7518 KernelChannel *pKernelChannel) 7519 { 7520 OBJGPU *pGpu = retainedChannel->pGpu; 7521 KernelFifo *pKernelFifo = GPU_GET_KERNEL_FIFO(pGpu); 7522 NvHandle hDupKernelCtxShare = NV01_NULL_OBJECT; 7523 RM_API *pRmApi; 7524 NV_STATUS status = NV_OK; 7525 7526 NV_ASSERT_OR_RETURN_VOID(pKernelChannel != NULL); 7527 7528 pRmApi = rmapiGetInterface(RMAPI_GPU_LOCK_INTERNAL); 7529 7530 // Subcontexts are parented by the TSG, so we must have a reference on the 7531 // TSG in order to retain the subcontext. The exception is if this channel 7532 // was allocated without a TSG, in which case RM creates an internal TSG and 7533 // subcontext which we shouldn't attempt to retain. In that case, we will 7534 // have skipped duping the TSG earlier and hDupTsg == 0. 7535 // 7536 // pKernelChannelGroup->bLegacyMode means that the subcontext was 7537 // created internally by RM, not by the user. 7538 if (kfifoIsSubcontextSupported(pKernelFifo) && 7539 pKernelChannel->pKernelCtxShareApi && 7540 retainedChannel->channelEngineType == UVM_GPU_CHANNEL_ENGINE_TYPE_GR && 7541 retainedChannel->hDupTsg && 7542 !pKernelChannel->pKernelChannelGroupApi->pKernelChannelGroup->bLegacyMode) 7543 { 7544 7545 status = pRmApi->DupObject(pRmApi, 7546 retainedChannel->session->handle, 7547 retainedChannel->hDupTsg, 7548 &hDupKernelCtxShare, 7549 RES_GET_CLIENT_HANDLE(pKernelChannel), 7550 RES_GET_HANDLE(pKernelChannel->pKernelCtxShareApi), 7551 NV04_DUP_HANDLE_FLAGS_REJECT_KERNEL_DUP_PRIVILEGE); 7552 7553 NV_ASSERT(status == NV_OK); 7554 retainedChannel->hDupKernelCtxShare = hDupKernelCtxShare; 7555 7556 // Faults report the VEID (aka subcontext ID), so we need to retain the 7557 // subcontext ID. We do that by taking a reference on the entire 7558 // subcontext object. 7559 // 7560 // pKernelCtxShare->pShareData is a pointer to the broadcast kctxshare data object 7561 // We get VEID for this retained channel's GPU through that. 7562 7563 // Possibly better to go through the handle qw just duped for this? Nor sure how to do so. 7564 channelInstanceInfo->subctxId = pKernelChannel->pKernelCtxShareApi->pShareData->subctxId; 7565 channelInstanceInfo->bInSubctx = NV_TRUE; 7566 7567 // Make sure that we saw our GPU 7568 NV_ASSERT(channelInstanceInfo->bInSubctx); 7569 NV_ASSERT(channelInstanceInfo->subctxId < channelInstanceInfo->tsgMaxSubctxCount); 7570 } 7571 else 7572 { 7573 channelInstanceInfo->subctxId = 0; 7574 channelInstanceInfo->bInSubctx = NV_FALSE; 7575 } 7576 } 7577 7578 // This function verifies that the instance pointer of the retainedChannel still 7579 // refers to a valid channel. 7580 static NV_STATUS nvGpuOpsGetChannelData(gpuRetainedChannel *retainedChannel, 7581 KernelChannel **ppKernelChannel) 7582 { 7583 NV2080_CTRL_FIFO_MEM_INFO instanceMemInfo; 7584 INST_BLOCK_DESC inst; 7585 KernelFifo *pKernelFifo = GPU_GET_KERNEL_FIFO(retainedChannel->pGpu); 7586 7587 kfifoFillMemInfo(pKernelFifo, retainedChannel->instanceMemDesc, &instanceMemInfo); 7588 7589 switch (instanceMemInfo.aperture) 7590 { 7591 case NV2080_CTRL_CMD_FIFO_GET_CHANNEL_MEM_APERTURE_SYSMEM_COH: 7592 inst.aperture = NV_MMU_PTE_APERTURE_SYSTEM_COHERENT_MEMORY; 7593 break; 7594 case NV2080_CTRL_CMD_FIFO_GET_CHANNEL_MEM_APERTURE_SYSMEM_NCOH: 7595 inst.aperture = NV_MMU_PTE_APERTURE_SYSTEM_NON_COHERENT_MEMORY; 7596 break; 7597 case NV2080_CTRL_CMD_FIFO_GET_CHANNEL_MEM_APERTURE_VIDMEM: 7598 inst.aperture = NV_MMU_PTE_APERTURE_VIDEO_MEMORY; 7599 break; 7600 default: 7601 return NV_ERR_INVALID_CHANNEL; 7602 } 7603 7604 inst.address = instanceMemInfo.base; 7605 inst.gfid = GPU_GFID_PF; // Run in VF context w/o GFID 7606 7607 return kfifoConvertInstToKernelChannel_HAL(retainedChannel->pGpu, 7608 pKernelFifo, 7609 &inst, 7610 ppKernelChannel); 7611 } 7612 7613 NV_STATUS nvGpuOpsRetainChannel(struct gpuAddressSpace *vaSpace, 7614 NvHandle hClient, 7615 NvHandle hKernelChannel, 7616 gpuRetainedChannel **retainedChannel, 7617 gpuChannelInstanceInfo *channelInstanceInfo) 7618 { 7619 nvGpuOpsLockSet acquiredLocks; 7620 THREAD_STATE_NODE threadState; 7621 KernelChannel *pKernelChannel = NULL; 7622 OBJGPU *pGpu = NULL; 7623 gpuRetainedChannel *channel = NULL; 7624 NV_STATUS status = NV_OK; 7625 struct gpuDevice *device; 7626 subDeviceDesc *rmSubDevice; 7627 NVC36F_CTRL_CMD_GPFIFO_GET_WORK_SUBMIT_TOKEN_PARAMS params = {0}; 7628 NV_UVM_CHANNEL_RETAINER_ALLOC_PARAMS channelRetainerParams = {0}; 7629 RM_API *pRmApi = NULL; 7630 NvHandle hChannelParent = 0; 7631 7632 if (!vaSpace || !channelInstanceInfo) 7633 return NV_ERR_INVALID_ARGUMENT; 7634 7635 threadStateInit(&threadState, THREAD_STATE_FLAGS_NONE); 7636 status = _nvGpuOpsLocksAcquireAll(RMAPI_LOCK_FLAGS_READ, 7637 hClient, 7638 NULL, 7639 &acquiredLocks); 7640 if (status != NV_OK) 7641 { 7642 threadStateFree(&threadState, THREAD_STATE_FLAGS_NONE); 7643 return status; 7644 } 7645 7646 device = vaSpace->device; 7647 rmSubDevice = device->rmSubDevice; 7648 7649 status = nvGpuOpsVerifyChannel(vaSpace, hClient, hKernelChannel, &pGpu, 7650 &pKernelChannel); 7651 if (status != NV_OK) 7652 { 7653 _nvGpuOpsLocksRelease(&acquiredLocks); 7654 threadStateFree(&threadState, THREAD_STATE_FLAGS_NONE); 7655 return status; 7656 } 7657 7658 portMemSet(channelInstanceInfo, 0, sizeof(*channelInstanceInfo)); 7659 7660 channel = portMemAllocNonPaged(sizeof(*channel)); 7661 if (channel == NULL) 7662 { 7663 status = NV_ERR_NO_MEMORY; 7664 _nvGpuOpsLocksRelease(&acquiredLocks); 7665 threadStateFree(&threadState, THREAD_STATE_FLAGS_NONE); 7666 return status; 7667 } 7668 7669 portMemSet(channel, 0, sizeof(*channel)); 7670 channel->device = device; 7671 channel->rmDevice = device->rmDevice; 7672 channel->rmSubDevice = rmSubDevice; 7673 channel->session = device->session; 7674 channel->pGpu = pGpu; 7675 7676 channelInstanceInfo->runlistId = kchannelGetRunlistId(pKernelChannel); 7677 channelInstanceInfo->chId = pKernelChannel->ChID; 7678 channel->chId = pKernelChannel->ChID; 7679 channel->runlistId = kchannelGetRunlistId(pKernelChannel); 7680 7681 status = nvGpuOpsGetChannelEngineType(pGpu, pKernelChannel, &channel->channelEngineType); 7682 if (status != NV_OK) 7683 goto error; 7684 7685 status = nvGpuOpsGetChannelInstanceMemInfo(channel, channelInstanceInfo); 7686 if (status != NV_OK) 7687 goto error; 7688 7689 status = nvGpuOpsGetChannelTsgInfo(channel, channelInstanceInfo, 7690 pKernelChannel); 7691 if (status != NV_OK) 7692 goto error; 7693 7694 status = nvGpuOpsGetChannelSmcInfo(channel, channelInstanceInfo, 7695 pKernelChannel, device); 7696 if (status != NV_OK) 7697 goto error; 7698 7699 nvGpuOpsGetChannelSubctxInfo(channel, channelInstanceInfo, pKernelChannel); 7700 7701 pRmApi = rmapiGetInterface(RMAPI_GPU_LOCK_INTERNAL); 7702 7703 if (channelInstanceInfo->bTsgChannel) 7704 hChannelParent = channel->hDupTsg; 7705 else 7706 hChannelParent = channel->rmDevice->deviceHandle; 7707 7708 channelRetainerParams.hClient = hClient; 7709 channelRetainerParams.hChannel = hKernelChannel; 7710 7711 NV_PRINTF(LEVEL_INFO, "%s:Channel duping is not supported. Fall back to UVM_CHANNEL_RETAINER\n", 7712 __FUNCTION__); 7713 7714 status = pRmApi->Alloc(pRmApi, 7715 device->session->handle, 7716 hChannelParent, 7717 &channel->hChannelRetainer, 7718 UVM_CHANNEL_RETAINER, 7719 &channelRetainerParams); 7720 if (status != NV_OK) 7721 goto error; 7722 7723 // Now get the token for submission on given channel. 7724 status = pRmApi->Control(pRmApi, 7725 hClient, 7726 hKernelChannel, 7727 NVC36F_CTRL_CMD_GPFIFO_GET_WORK_SUBMIT_TOKEN, 7728 ¶ms, 7729 sizeof(params)); 7730 7731 if (status != NV_OK) 7732 { 7733 NV_PRINTF(LEVEL_ERROR, "%s:%d: %s\n", __FUNCTION__, 7734 __LINE__, nvstatusToString(status)); 7735 goto error; 7736 } 7737 7738 // In Turing+ gpus, the CLEAR_FAULTED method requires a RM-provided handle 7739 // to identify the channel. 7740 // 7741 // TODO: Bug 1905719: We are currently using the channel handle that is 7742 // used for the work submission usermode doorbell mechanism. However, the 7743 // values may differ in the future, so we may need a dedicated API to get 7744 // the channel handle for CLEAR_FAULTED in RM. 7745 channelInstanceInfo->clearFaultedToken = params.workSubmitToken; 7746 7747 if (isDeviceAmperePlus(device)) 7748 { 7749 void *bar0Mapping = gpuBar0BaseAddress(pGpu); 7750 NvU32 chramPri; 7751 NvU32 runlistPri; 7752 7753 NV_ASSERT_OK_OR_GOTO(status, kfifoEngineInfoXlate_HAL(pGpu, 7754 GPU_GET_KERNEL_FIFO(pGpu), 7755 ENGINE_INFO_TYPE_RUNLIST, 7756 kchannelGetRunlistId(pKernelChannel), 7757 ENGINE_INFO_TYPE_CHRAM_PRI_BASE, 7758 &chramPri), error); 7759 7760 chramPri += NV_CHRAM_CHANNEL(pKernelChannel->ChID); 7761 7762 channelInstanceInfo->pChramChannelRegister = (NvU32 *)((NvU8*)bar0Mapping + chramPri); 7763 7764 NV_ASSERT_OK_OR_GOTO(status, kfifoEngineInfoXlate_HAL(pGpu, 7765 GPU_GET_KERNEL_FIFO(pGpu), 7766 ENGINE_INFO_TYPE_RUNLIST, 7767 kchannelGetRunlistId(pKernelChannel), 7768 ENGINE_INFO_TYPE_RUNLIST_PRI_BASE, 7769 &runlistPri), error); 7770 7771 channelInstanceInfo->pRunlistPRIBaseRegister = (NvU32 *)((NvU8*)bar0Mapping + runlistPri); 7772 } 7773 7774 status = _nvGpuOpsRetainChannelResources(device, 7775 hClient, 7776 hKernelChannel, 7777 channel, 7778 channelInstanceInfo); 7779 if (status != NV_OK) 7780 { 7781 NV_PRINTF(LEVEL_ERROR, "%s:%d: %s\n", __FUNCTION__, 7782 __LINE__, nvstatusToString(status)); 7783 goto error; 7784 } 7785 7786 channelInstanceInfo->channelEngineType = channel->channelEngineType; 7787 *retainedChannel = channel; 7788 7789 _nvGpuOpsLocksRelease(&acquiredLocks); 7790 threadStateFree(&threadState, THREAD_STATE_FLAGS_NONE); 7791 return NV_OK; 7792 7793 error: 7794 _nvGpuOpsReleaseChannel(channel); 7795 _nvGpuOpsLocksRelease(&acquiredLocks); 7796 threadStateFree(&threadState, THREAD_STATE_FLAGS_NONE); 7797 return status; 7798 } 7799 7800 static void _nvGpuOpsReleaseChannel(gpuRetainedChannel *retainedChannel) 7801 { 7802 NV_STATUS status = NV_OK; 7803 struct gpuSession *session; 7804 RM_API *pRmApi = rmapiGetInterface(RMAPI_GPU_LOCK_INTERNAL); 7805 7806 if (!retainedChannel) 7807 return; 7808 7809 _nvGpuOpsReleaseChannelResources(retainedChannel); 7810 7811 session = retainedChannel->session; 7812 NV_ASSERT(session); 7813 7814 if (retainedChannel->hChannelRetainer) 7815 { 7816 status = pRmApi->Free(pRmApi, session->handle, retainedChannel->hChannelRetainer); 7817 NV_ASSERT(status == NV_OK); 7818 } 7819 7820 // Release the subcontext if we retained it. Subcontexts are parented by the 7821 // TSG, so we must release the subcontext before releasing the TSG. 7822 if (retainedChannel->hDupKernelCtxShare) 7823 { 7824 NV_ASSERT(retainedChannel->hDupTsg); 7825 status = pRmApi->Free(pRmApi, session->handle, retainedChannel->hDupKernelCtxShare); 7826 NV_ASSERT(status == NV_OK); 7827 } 7828 7829 if (retainedChannel->hDupTsg) 7830 { 7831 status = pRmApi->Free(pRmApi, session->handle, retainedChannel->hDupTsg); 7832 NV_ASSERT(status == NV_OK); 7833 } 7834 7835 7836 // Releasing the channel ID can only fail if the ID is no longer valid, 7837 // which indicates a bug elsewhere. 7838 NV_ASSERT(status == NV_OK); 7839 7840 portMemFree(retainedChannel); 7841 } 7842 7843 void nvGpuOpsReleaseChannel(gpuRetainedChannel *retainedChannel) 7844 { 7845 nvGpuOpsLockSet acquiredLocks; 7846 THREAD_STATE_NODE threadState; 7847 threadStateInit(&threadState, THREAD_STATE_FLAGS_NONE); 7848 // TODO can we lock fewer GPUS with Channel information? 7849 if (_nvGpuOpsLocksAcquireAll(RMAPI_LOCK_FLAGS_READ, 7850 retainedChannel->session->handle, 7851 NULL, 7852 &acquiredLocks) != NV_OK) 7853 { 7854 threadStateFree(&threadState, THREAD_STATE_FLAGS_NONE); 7855 return; 7856 } 7857 _nvGpuOpsReleaseChannel(retainedChannel); 7858 _nvGpuOpsLocksRelease(&acquiredLocks); 7859 threadStateFree(&threadState, THREAD_STATE_FLAGS_NONE); 7860 } 7861 7862 static void 7863 _shadowMemdescDestroy(gpuRetainedChannel *retainedChannel, 7864 MEMORY_DESCRIPTOR *pMemDesc) 7865 { 7866 if (pMemDesc->RefCount == 1) 7867 { 7868 mapRemoveByKey(&retainedChannel->device->kern2PhysDescrMap, (NvU64) pMemDesc); 7869 } 7870 7871 memdescDestroy(pMemDesc); 7872 } 7873 7874 NvBool _memDescFindAndRetain(gpuRetainedChannel *retainedChannel, 7875 MEMORY_DESCRIPTOR *pBufferHandle, 7876 MEMORY_DESCRIPTOR **ppMemDesc) 7877 { 7878 MEMORY_DESCRIPTOR *pMemDesc = NULL; 7879 MemdescMapIter iter = mapIterAll(&retainedChannel->device->kern2PhysDescrMap); 7880 while (mapIterNext(&iter)) 7881 { 7882 MEMORY_DESCRIPTOR **ppValue = iter.pValue; 7883 if (pBufferHandle == *ppValue) 7884 { 7885 NvU64 key = mapKey(&retainedChannel->device->kern2PhysDescrMap, ppValue); 7886 pMemDesc = (MEMORY_DESCRIPTOR *) key; 7887 break; 7888 } 7889 } 7890 7891 if (pMemDesc != NULL) 7892 { 7893 _memdescRetain(pMemDesc); 7894 *ppMemDesc = pMemDesc; 7895 return NV_TRUE; 7896 } 7897 return NV_FALSE; 7898 } 7899 7900 static NV_STATUS 7901 _shadowMemdescCreateFlcn(gpuRetainedChannel *retainedChannel, 7902 NV2080_CTRL_FLCN_GET_CTX_BUFFER_INFO_PARAMS *pCtxBufferInfo, 7903 MEMORY_DESCRIPTOR **ppMemDesc) 7904 { 7905 MEMORY_DESCRIPTOR *pMemDesc = NULL; 7906 MEMORY_DESCRIPTOR *pBufferHandle = (MEMORY_DESCRIPTOR *) pCtxBufferInfo->bufferHandle; 7907 NV_STATUS status = NV_OK; 7908 7909 NV_ASSERT_OR_RETURN(pCtxBufferInfo->bIsContigous, NV_ERR_INVALID_STATE); 7910 7911 if (_memDescFindAndRetain(retainedChannel, pBufferHandle, ppMemDesc)) 7912 return status; 7913 7914 status = memdescCreate(&pMemDesc, 7915 retainedChannel->pGpu, 7916 pCtxBufferInfo->size, 7917 pCtxBufferInfo->alignment, 7918 pCtxBufferInfo->bIsContigous, 7919 pCtxBufferInfo->aperture, 7920 NV_MEMORY_CACHED, 7921 MEMDESC_FLAGS_NONE 7922 ); 7923 NV_CHECK_OK_OR_RETURN(LEVEL_ERROR, status); 7924 7925 memdescSetPageSize(pMemDesc, 0, pCtxBufferInfo->pageSize); 7926 7927 memdescDescribe(pMemDesc, pCtxBufferInfo->aperture, pCtxBufferInfo->physAddr, pCtxBufferInfo->size); 7928 7929 (void) mapInsertValue(&retainedChannel->device->kern2PhysDescrMap, 7930 (NvU64) pMemDesc, 7931 &pBufferHandle); 7932 *ppMemDesc = pMemDesc; 7933 7934 return status; 7935 } 7936 7937 7938 static NV_STATUS 7939 _shadowMemdescCreate(gpuRetainedChannel *retainedChannel, 7940 NV2080_CTRL_GR_CTX_BUFFER_INFO *pCtxBufferInfo, 7941 MEMORY_DESCRIPTOR **ppMemDesc) 7942 { 7943 NvU32 j; 7944 NvU64 pageSize = pCtxBufferInfo->pageSize; 7945 NvU32 numBufferPages = NV_ROUNDUP(pCtxBufferInfo->size, pageSize) / pageSize; 7946 MEMORY_DESCRIPTOR *pMemDesc = NULL; 7947 MEMORY_DESCRIPTOR *pBufferHandle = (MEMORY_DESCRIPTOR *) pCtxBufferInfo->bufferHandle; 7948 NV2080_CTRL_KGR_GET_CTX_BUFFER_PTES_PARAMS *pParams = NULL; 7949 NvU64 *pPages = NULL; 7950 NV_STATUS status = NV_OK; 7951 KernelChannel *pKernelChannel; 7952 RM_API *pRmApi; 7953 7954 if (_memDescFindAndRetain(retainedChannel, pBufferHandle, ppMemDesc)) 7955 goto done; 7956 7957 pPages = portMemAllocNonPaged(sizeof(*pPages) * numBufferPages); 7958 if (pPages == NULL) 7959 { 7960 status = NV_ERR_NO_MEMORY; 7961 goto done; 7962 } 7963 7964 status = nvGpuOpsGetChannelData(retainedChannel, &pKernelChannel); 7965 if (status != NV_OK) 7966 { 7967 goto done; 7968 } 7969 7970 pParams = portMemAllocNonPaged(sizeof(*pParams)); 7971 if (pParams == NULL) 7972 { 7973 status = NV_ERR_NO_MEMORY; 7974 goto done; 7975 } 7976 7977 portMemSet(pParams, 0, sizeof(*pParams)); 7978 7979 pParams->hUserClient = RES_GET_CLIENT_HANDLE(pKernelChannel); 7980 pParams->hChannel = RES_GET_HANDLE(pKernelChannel); 7981 pParams->bufferType = pCtxBufferInfo->bufferType; 7982 7983 pRmApi = rmapiGetInterface(RMAPI_GPU_LOCK_INTERNAL); 7984 7985 for (j = 0; j < numBufferPages;) 7986 { 7987 pParams->firstPage = j; 7988 status = pRmApi->Control(pRmApi, 7989 retainedChannel->session->handle, 7990 retainedChannel->rmSubDevice->subDeviceHandle, 7991 NV2080_CTRL_CMD_KGR_GET_CTX_BUFFER_PTES, 7992 pParams, 7993 sizeof(*pParams)); 7994 if (status != NV_OK) 7995 { 7996 goto done; 7997 } 7998 7999 NV_ASSERT(j + pParams->numPages <= numBufferPages); 8000 8001 if (pCtxBufferInfo->bIsContigous) 8002 { 8003 pPages[0] = (NvU64)pParams->physAddrs[0]; 8004 break; 8005 } 8006 8007 portMemCopy(&pPages[j], pParams->numPages * sizeof(*pPages), 8008 pParams->physAddrs, pParams->numPages * sizeof(*pPages)); 8009 j += pParams->numPages; 8010 } 8011 8012 NV_ASSERT(pParams->bNoMorePages); 8013 8014 status = memdescCreate(&pMemDesc, 8015 retainedChannel->pGpu, 8016 pCtxBufferInfo->size, 8017 pCtxBufferInfo->alignment, 8018 pCtxBufferInfo->bIsContigous, 8019 pCtxBufferInfo->aperture, 8020 NV_MEMORY_CACHED, 8021 MEMDESC_FLAGS_NONE 8022 ); 8023 if (status != NV_OK) 8024 { 8025 goto done; 8026 } 8027 8028 8029 memdescSetPageSize(pMemDesc, 0, pCtxBufferInfo->pageSize); 8030 8031 if (pCtxBufferInfo->bIsContigous) 8032 { 8033 memdescDescribe(pMemDesc, pCtxBufferInfo->aperture, pPages[0], pCtxBufferInfo->size); 8034 } 8035 else 8036 { 8037 memdescFillPages(pMemDesc, 0, pPages, numBufferPages, pCtxBufferInfo->pageSize); 8038 } 8039 8040 (void) mapInsertValue(&retainedChannel->device->kern2PhysDescrMap, 8041 (NvU64) pMemDesc, 8042 &pBufferHandle); 8043 *ppMemDesc = pMemDesc; 8044 8045 done: 8046 portMemFree(pParams); 8047 portMemFree(pPages); 8048 return status; 8049 } 8050 8051 static NV_STATUS _nvGpuOpsRetainChannelResources(struct gpuDevice *device, 8052 NvHandle hClient, 8053 NvHandle hKernelChannel, 8054 gpuRetainedChannel *retainedChannel, 8055 gpuChannelInstanceInfo *channelInstanceInfo) 8056 { 8057 NV_STATUS status = NV_OK; 8058 NV2080_CTRL_GR_GET_CTX_BUFFER_INFO_PARAMS *pParams = NULL; 8059 NV2080_CTRL_FLCN_GET_CTX_BUFFER_INFO_PARAMS *pFlcnParams = NULL; 8060 gpuChannelResourceInfo *channelResourceInfo = channelInstanceInfo->resourceInfo; 8061 KernelChannel *pKernelChannel; 8062 RM_API *pRmApi; 8063 NvU32 channelEngineType = retainedChannel->channelEngineType; 8064 NvU32 i; 8065 NvU32 j; 8066 8067 NV_ASSERT(channelEngineType == UVM_GPU_CHANNEL_ENGINE_TYPE_CE || 8068 channelEngineType == UVM_GPU_CHANNEL_ENGINE_TYPE_GR || 8069 channelEngineType == UVM_GPU_CHANNEL_ENGINE_TYPE_SEC2); 8070 8071 // CE channels have 0 resources, so they skip this step 8072 if (channelEngineType == UVM_GPU_CHANNEL_ENGINE_TYPE_CE) 8073 { 8074 goto done; 8075 } 8076 8077 status = nvGpuOpsGetChannelData(retainedChannel, &pKernelChannel); 8078 if (status != NV_OK) 8079 { 8080 goto done; 8081 } 8082 8083 if (channelEngineType == UVM_GPU_CHANNEL_ENGINE_TYPE_SEC2) 8084 { 8085 // get engine context memdesc, then get its PTEs. 8086 MEMORY_DESCRIPTOR *pMemDesc = NULL; 8087 8088 // single buffer 8089 NV_ASSERT_OR_GOTO(NV_ARRAY_ELEMENTS(channelInstanceInfo->resourceInfo) >= 1, done); 8090 8091 pFlcnParams = portMemAllocNonPaged(sizeof(*pFlcnParams)); 8092 if (pFlcnParams == NULL) 8093 { 8094 status = NV_ERR_NO_MEMORY; 8095 goto done; 8096 } 8097 pFlcnParams->hUserClient = RES_GET_CLIENT_HANDLE(pKernelChannel); 8098 pFlcnParams->hChannel = RES_GET_HANDLE(pKernelChannel); 8099 8100 pRmApi = rmapiGetInterface(RMAPI_GPU_LOCK_INTERNAL); 8101 8102 // This RM CTRL refcounts all the resource memdescs. 8103 status = pRmApi->Control(pRmApi, 8104 retainedChannel->session->handle, 8105 retainedChannel->rmSubDevice->subDeviceHandle, 8106 NV2080_CTRL_CMD_FLCN_GET_CTX_BUFFER_INFO, 8107 pFlcnParams, 8108 sizeof(*pFlcnParams)); 8109 if (status != NV_OK) 8110 goto done; 8111 8112 gpuMemoryInfo *pGpuMemoryInfo = &channelResourceInfo[0].resourceInfo; 8113 8114 channelResourceInfo[0].resourceDescriptor = pFlcnParams->bufferHandle; 8115 channelResourceInfo[0].alignment = pFlcnParams->alignment; 8116 pGpuMemoryInfo->pageSize = pFlcnParams->pageSize; 8117 pGpuMemoryInfo->size = pFlcnParams->size; 8118 pGpuMemoryInfo->contig = pFlcnParams->bIsContigous; 8119 pGpuMemoryInfo->physAddr = pFlcnParams->physAddr; 8120 pGpuMemoryInfo->kind = pFlcnParams->kind; 8121 pGpuMemoryInfo->sysmem = pFlcnParams->aperture == ADDR_SYSMEM; 8122 pGpuMemoryInfo->deviceDescendant = pFlcnParams->bDeviceDescendant; 8123 8124 portMemCopy(pGpuMemoryInfo->uuid.uuid, sizeof(pGpuMemoryInfo->uuid.uuid), 8125 pFlcnParams->uuid, sizeof(pFlcnParams->uuid)); 8126 8127 status = _shadowMemdescCreateFlcn(retainedChannel, pFlcnParams, &pMemDesc); 8128 if (status != NV_OK) 8129 goto done; 8130 8131 channelResourceInfo[0].resourceDescriptor = (NvP64) pMemDesc; 8132 retainedChannel->resourceMemDesc[0] = pMemDesc; 8133 8134 channelInstanceInfo->resourceCount = 1; 8135 retainedChannel->resourceCount = 1; 8136 goto done; 8137 } 8138 8139 pParams = portMemAllocNonPaged(sizeof(*pParams)); 8140 if (pParams == NULL) 8141 { 8142 status = NV_ERR_NO_MEMORY; 8143 goto done; 8144 } 8145 8146 pParams->hUserClient = RES_GET_CLIENT_HANDLE(pKernelChannel); 8147 pParams->hChannel = RES_GET_HANDLE(pKernelChannel); 8148 8149 pRmApi = rmapiGetInterface(RMAPI_GPU_LOCK_INTERNAL); 8150 8151 // This RM CTRL refcounts all the resource memdescs. 8152 status = pRmApi->Control(pRmApi, 8153 retainedChannel->session->handle, 8154 retainedChannel->rmSubDevice->subDeviceHandle, 8155 NV2080_CTRL_CMD_GR_GET_CTX_BUFFER_INFO, 8156 pParams, 8157 sizeof(*pParams)); 8158 if (status != NV_OK) 8159 goto done; 8160 8161 NV_ASSERT(pParams->bufferCount <= NV_ARRAY_ELEMENTS(channelInstanceInfo->resourceInfo)); 8162 8163 for (i = 0; i < pParams->bufferCount; i++) 8164 { 8165 MEMORY_DESCRIPTOR *pMemDesc = NULL; 8166 NV2080_CTRL_GR_CTX_BUFFER_INFO *pCtxBufferInfo = &pParams->ctxBufferInfo[i]; 8167 gpuMemoryInfo *pGpuMemoryInfo = &channelResourceInfo[i].resourceInfo; 8168 8169 channelResourceInfo[i].resourceDescriptor = pCtxBufferInfo->bufferHandle; 8170 channelResourceInfo[i].resourceId = pCtxBufferInfo->bufferType; 8171 channelResourceInfo[i].alignment = pCtxBufferInfo->alignment; 8172 pGpuMemoryInfo->pageSize = pCtxBufferInfo->pageSize; 8173 pGpuMemoryInfo->size = pCtxBufferInfo->size; 8174 pGpuMemoryInfo->contig = pCtxBufferInfo->bIsContigous; 8175 pGpuMemoryInfo->physAddr = pCtxBufferInfo->physAddr; 8176 pGpuMemoryInfo->kind = pCtxBufferInfo->kind; 8177 pGpuMemoryInfo->sysmem = pCtxBufferInfo->aperture == ADDR_SYSMEM; 8178 pGpuMemoryInfo->deviceDescendant = pCtxBufferInfo->bDeviceDescendant; 8179 8180 portMemCopy(pGpuMemoryInfo->uuid.uuid, sizeof(pGpuMemoryInfo->uuid.uuid), 8181 pCtxBufferInfo->uuid, sizeof(pCtxBufferInfo->uuid)); 8182 8183 status = _shadowMemdescCreate(retainedChannel, pCtxBufferInfo, &pMemDesc); 8184 if (status != NV_OK) 8185 goto cleanup; 8186 8187 channelResourceInfo[i].resourceDescriptor = (NvP64) pMemDesc; 8188 retainedChannel->resourceMemDesc[i] = pMemDesc; 8189 } 8190 8191 channelInstanceInfo->resourceCount = pParams->bufferCount; 8192 retainedChannel->resourceCount = pParams->bufferCount; 8193 8194 cleanup: 8195 if (status != NV_OK) 8196 { 8197 for (j = 0; j < i; j++) 8198 { 8199 _shadowMemdescDestroy(retainedChannel, retainedChannel->resourceMemDesc[j]); 8200 } 8201 } 8202 8203 done: 8204 portMemFree(pParams); 8205 portMemFree(pFlcnParams); 8206 return status; 8207 } 8208 8209 static void _nvGpuOpsReleaseChannelResources(gpuRetainedChannel *retainedChannel) 8210 { 8211 NvU32 i; 8212 NvU32 descriptorCount = retainedChannel->resourceCount; 8213 8214 for (i = 0; i < descriptorCount; i++) 8215 { 8216 MEMORY_DESCRIPTOR *pMemDesc = retainedChannel->resourceMemDesc[i]; 8217 8218 _shadowMemdescDestroy(retainedChannel, pMemDesc); 8219 } 8220 } 8221 8222 NV_STATUS nvGpuOpsGetChannelResourcePtes(struct gpuAddressSpace *vaSpace, 8223 NvP64 resourceDescriptor, 8224 NvU64 offset, 8225 NvU64 size, 8226 gpuExternalMappingInfo *pGpuExternalMappingInfo) 8227 { 8228 NV_STATUS status = NV_OK; 8229 nvGpuOpsLockSet acquiredLocks; 8230 THREAD_STATE_NODE threadState; 8231 NvHandle hSubDevice; 8232 PMEMORY_DESCRIPTOR pMemDesc = NULL; 8233 OBJGPU *pMappingGpu = NULL; 8234 OBJVASPACE *pVAS = NULL; 8235 RsClient *pClient; 8236 8237 if (!vaSpace || !resourceDescriptor || !pGpuExternalMappingInfo) 8238 return NV_ERR_INVALID_ARGUMENT; 8239 8240 if (pGpuExternalMappingInfo->mappingPageSize != 0) 8241 { 8242 return NV_ERR_NOT_SUPPORTED; 8243 } 8244 8245 threadStateInit(&threadState, THREAD_STATE_FLAGS_NONE); 8246 status = _nvGpuOpsLocksAcquireAll(RMAPI_LOCK_FLAGS_READ, 8247 vaSpace->device->session->handle, 8248 NULL, 8249 &acquiredLocks); 8250 if (status != NV_OK) 8251 { 8252 threadStateFree(&threadState, THREAD_STATE_FLAGS_NONE); 8253 return status; 8254 } 8255 8256 pMemDesc = (MEMORY_DESCRIPTOR *) NvP64_VALUE(resourceDescriptor); 8257 8258 status = CliSetSubDeviceContext(vaSpace->device->session->handle, 8259 vaSpace->device->subhandle, 8260 &hSubDevice, 8261 &pMappingGpu); 8262 if (status != NV_OK) 8263 { 8264 _nvGpuOpsLocksRelease(&acquiredLocks); 8265 threadStateFree(&threadState, THREAD_STATE_FLAGS_NONE); 8266 return status; 8267 } 8268 8269 if (pMemDesc->pGpu != pMappingGpu) 8270 { 8271 _nvGpuOpsLocksRelease(&acquiredLocks); 8272 threadStateFree(&threadState, THREAD_STATE_FLAGS_NONE); 8273 return NV_ERR_NOT_SUPPORTED; 8274 } 8275 8276 // Do not support mapping on anything other than sysmem/vidmem! 8277 if ((memdescGetAddressSpace(pMemDesc) != ADDR_SYSMEM) && 8278 (memdescGetAddressSpace(pMemDesc) != ADDR_FBMEM)) 8279 { 8280 _nvGpuOpsLocksRelease(&acquiredLocks); 8281 threadStateFree(&threadState, THREAD_STATE_FLAGS_NONE); 8282 return NV_ERR_NOT_SUPPORTED; 8283 } 8284 8285 status = serverGetClientUnderLock(&g_resServ, vaSpace->device->session->handle, &pClient); 8286 if (status != NV_OK) 8287 { 8288 _nvGpuOpsLocksRelease(&acquiredLocks); 8289 threadStateFree(&threadState, THREAD_STATE_FLAGS_NONE); 8290 return status; 8291 } 8292 8293 status = vaspaceGetByHandleOrDeviceDefault(pClient, 8294 vaSpace->device->handle, 8295 vaSpace->handle, 8296 &pVAS); 8297 if (status != NV_OK) 8298 { 8299 _nvGpuOpsLocksRelease(&acquiredLocks); 8300 threadStateFree(&threadState, THREAD_STATE_FLAGS_NONE); 8301 return status; 8302 } 8303 8304 status = nvGpuOpsBuildExternalAllocPtes(pVAS, pMappingGpu, pMemDesc, NULL, 8305 offset, size, NV_FALSE, NV_FALSE, 8306 0, pGpuExternalMappingInfo); 8307 8308 _nvGpuOpsLocksRelease(&acquiredLocks); 8309 threadStateFree(&threadState, THREAD_STATE_FLAGS_NONE); 8310 return status; 8311 } 8312 8313 NV_STATUS nvGpuOpsBindChannelResources(gpuRetainedChannel *retainedChannel, 8314 gpuChannelResourceBindParams *channelResourceBindParams) 8315 { 8316 NV_STATUS status = NV_OK; 8317 nvGpuOpsLockSet acquiredLocks; 8318 THREAD_STATE_NODE threadState; 8319 NV2080_CTRL_GPU_PROMOTE_CTX_PARAMS *pParams; 8320 NvU32 i; 8321 KernelChannel *pKernelChannel = NULL; 8322 RM_API *pRmApi = rmapiGetInterface(RMAPI_GPU_LOCK_INTERNAL); 8323 8324 threadStateInit(&threadState, THREAD_STATE_FLAGS_NONE); 8325 8326 status = _nvGpuOpsLocksAcquireAll(RMAPI_LOCK_FLAGS_READ, 8327 retainedChannel->session->handle, 8328 NULL, 8329 &acquiredLocks); 8330 if (status != NV_OK) 8331 { 8332 threadStateFree(&threadState, THREAD_STATE_FLAGS_NONE); 8333 return status; 8334 } 8335 8336 status = nvGpuOpsGetChannelData(retainedChannel, &pKernelChannel); 8337 if (status != NV_OK) 8338 { 8339 _nvGpuOpsLocksRelease(&acquiredLocks); 8340 threadStateFree(&threadState, THREAD_STATE_FLAGS_NONE); 8341 return status; 8342 } 8343 8344 // Unregister channel resources. CE channels have 0 resources, so they skip this step 8345 if (retainedChannel->resourceCount != 0) 8346 { 8347 RM_ENGINE_TYPE rmEngineType; 8348 8349 pParams = portMemAllocNonPaged(sizeof(*pParams)); 8350 if (pParams == NULL) 8351 { 8352 status = NV_ERR_NO_MEMORY; 8353 _nvGpuOpsLocksRelease(&acquiredLocks); 8354 threadStateFree(&threadState, THREAD_STATE_FLAGS_NONE); 8355 return NV_ERR_INSUFFICIENT_RESOURCES; 8356 } 8357 8358 portMemSet(pParams, 0, sizeof(*pParams)); 8359 8360 pParams->hChanClient = RES_GET_CLIENT_HANDLE(pKernelChannel); 8361 pParams->hObject = RES_GET_HANDLE(pKernelChannel); 8362 pParams->entryCount = retainedChannel->resourceCount; 8363 8364 status = kfifoEngineInfoXlate_HAL(retainedChannel->pGpu, 8365 GPU_GET_KERNEL_FIFO(retainedChannel->pGpu), 8366 ENGINE_INFO_TYPE_RUNLIST, 8367 retainedChannel->runlistId, 8368 ENGINE_INFO_TYPE_RM_ENGINE_TYPE, 8369 (NvU32 *)&rmEngineType); 8370 8371 pParams->engineType = gpuGetNv2080EngineType(rmEngineType); 8372 8373 for (i = 0; i < retainedChannel->resourceCount; i++) 8374 { 8375 if (RM_ENGINE_TYPE_IS_GR(rmEngineType)) 8376 pParams->promoteEntry[i].bufferId = channelResourceBindParams[i].resourceId; 8377 8378 pParams->promoteEntry[i].gpuVirtAddr = channelResourceBindParams[i].resourceVa; 8379 } 8380 8381 status = pRmApi->Control(pRmApi, 8382 retainedChannel->session->handle, 8383 retainedChannel->rmSubDevice->subDeviceHandle, 8384 NV2080_CTRL_CMD_GPU_PROMOTE_CTX, 8385 pParams, 8386 sizeof(*pParams)); 8387 8388 portMemFree(pParams); 8389 } 8390 8391 if (NV_OK == status) 8392 { 8393 pKernelChannel->bIsContextBound = NV_TRUE; 8394 } 8395 8396 _nvGpuOpsLocksRelease(&acquiredLocks); 8397 threadStateFree(&threadState, THREAD_STATE_FLAGS_NONE); 8398 return status; 8399 } 8400 8401 // nvGpuOpsRetainChannelResources only increments the ref-counts of the memdescs under the channel. 8402 // It does not prevent the user from freeing the associated hClient and hChannel handles, which means 8403 // the instance pointer may no longer be associated with a user object at this point. 8404 // If the instance pointer still has an associated channel, the channel is preempted and disabled. 8405 // Otherwise that must have already happened, so we just need to drop the ref counts on the resources 8406 void nvGpuOpsStopChannel(gpuRetainedChannel *retainedChannel, 8407 NvBool bImmediate) 8408 { 8409 NV_STATUS status = NV_OK; 8410 nvGpuOpsLockSet acquiredLocks; 8411 THREAD_STATE_NODE threadState; 8412 KernelChannel *pKernelChannel = NULL; 8413 RsResourceRef *pResourceRef; 8414 RM_API *pRmApi = rmapiGetInterface(RMAPI_GPU_LOCK_INTERNAL); 8415 NVA06F_CTRL_STOP_CHANNEL_PARAMS stopChannelParams = {0}; 8416 8417 threadStateInit(&threadState, THREAD_STATE_FLAGS_NONE); 8418 8419 if (_nvGpuOpsLocksAcquireAll(RMAPI_LOCK_FLAGS_READ, 8420 retainedChannel->session->handle, 8421 NULL, 8422 &acquiredLocks) != NV_OK) 8423 { 8424 threadStateFree(&threadState, THREAD_STATE_FLAGS_NONE); 8425 return; 8426 } 8427 8428 status = nvGpuOpsGetChannelData(retainedChannel, &pKernelChannel); 8429 if (status != NV_OK) 8430 { 8431 _nvGpuOpsLocksRelease(&acquiredLocks); 8432 threadStateFree(&threadState, THREAD_STATE_FLAGS_NONE); 8433 return; 8434 } 8435 8436 // Verify this channel handle is still valid 8437 status = serverutilGetResourceRef(RES_GET_CLIENT_HANDLE(pKernelChannel), RES_GET_HANDLE(pKernelChannel), &pResourceRef); 8438 if (status != NV_OK) 8439 { 8440 NV_ASSERT(0); 8441 _nvGpuOpsLocksRelease(&acquiredLocks); 8442 threadStateFree(&threadState, THREAD_STATE_FLAGS_NONE); 8443 return; 8444 } 8445 8446 stopChannelParams.bImmediate = bImmediate; 8447 NV_ASSERT_OK( 8448 pRmApi->Control(pRmApi, 8449 RES_GET_CLIENT_HANDLE(pKernelChannel), 8450 RES_GET_HANDLE(pKernelChannel), 8451 NVA06F_CTRL_CMD_STOP_CHANNEL, 8452 &stopChannelParams, 8453 sizeof(stopChannelParams))); 8454 8455 pKernelChannel->bIsContextBound = NV_FALSE; 8456 8457 if (retainedChannel->channelEngineType == UVM_GPU_CHANNEL_ENGINE_TYPE_GR) 8458 { 8459 NV2080_CTRL_GPU_EVICT_CTX_PARAMS params; 8460 8461 portMemSet(¶ms, 0, sizeof(params)); 8462 params.engineType = NV2080_ENGINE_TYPE_GR(0); 8463 params.hClient = retainedChannel->session->handle; 8464 params.hChanClient = RES_GET_CLIENT_HANDLE(pKernelChannel); 8465 params.hObject = RES_GET_HANDLE(pKernelChannel); 8466 8467 NV_ASSERT_OK( 8468 pRmApi->Control(pRmApi, 8469 retainedChannel->session->handle, 8470 retainedChannel->rmSubDevice->subDeviceHandle, 8471 NV2080_CTRL_CMD_GPU_EVICT_CTX, 8472 ¶ms, 8473 sizeof(params))); 8474 } 8475 8476 _nvGpuOpsLocksRelease(&acquiredLocks); 8477 threadStateFree(&threadState, THREAD_STATE_FLAGS_NONE); 8478 } 8479 8480 // Make sure the UVM and PMA structs are in sync 8481 // The following location(s) need to be synced as well: 8482 // - uvm8_pmm_gpu.c:uvm8_test_pmm_query_pma_stats 8483 ct_assert(sizeof(UvmPmaStatistics) == sizeof(PMA_STATS)); 8484 ct_assert(NV_OFFSETOF(UvmPmaStatistics, numPages2m) == NV_OFFSETOF(PMA_STATS, num2mbPages)); 8485 ct_assert(NV_OFFSETOF(UvmPmaStatistics, numFreePages64k) == NV_OFFSETOF(PMA_STATS, numFreeFrames)); 8486 ct_assert(NV_OFFSETOF(UvmPmaStatistics, numFreePages2m) == NV_OFFSETOF(PMA_STATS, numFree2mbPages)); 8487 8488 /*! 8489 * Retrieve the PMA (Physical Memory Allocator) object initialized by RM 8490 * for the given device. 8491 * 8492 * @param[in] device device handle obtained in a prior call 8493 * to nvGpuOpsRmDeviceCreate. 8494 * 8495 * @param[out] pPmaObject Void pointer to RM PMA object of associated GPU 8496 * NULL if PMA not enabled & initialized. 8497 * @param[out] pPmaPubStats Pointer to UVM PMA statistics object of 8498 * associated GPU. Cannot be NULL. 8499 * 8500 * @returns NV_OK on success, 8501 * NV_ERR_INVALID_ARGUMENT if NULL pPmaObject, 8502 * NV_ERR_OBJECT_NOT_FOUND if PMA object not found 8503 * NV_ERR_NOT_SUPPORTED if PMA not supported 8504 */ 8505 NV_STATUS nvGpuOpsGetPmaObject(struct gpuDevice *device, 8506 void **pPmaObject, 8507 const UvmPmaStatistics **pPmaStats) 8508 { 8509 nvGpuOpsLockSet acquiredLocks; 8510 THREAD_STATE_NODE threadState; 8511 OBJGPU *pGpu = NULL; 8512 Heap *pHeap = NULL; 8513 MemoryManager *pMemoryManager; 8514 struct gpuSession *session = device->session; 8515 NV_STATUS status; 8516 8517 threadStateInit(&threadState, THREAD_STATE_FLAGS_NONE); 8518 status = _nvGpuOpsLocksAcquireAll(RMAPI_LOCK_FLAGS_READ, session->handle, NULL, &acquiredLocks); 8519 if (status != NV_OK) 8520 { 8521 threadStateFree(&threadState, THREAD_STATE_FLAGS_NONE); 8522 return status; 8523 } 8524 8525 status = CliSetGpuContext(session->handle, device->handle, &pGpu, NULL); 8526 if (status != NV_OK) 8527 { 8528 _nvGpuOpsLocksRelease(&acquiredLocks); 8529 threadStateFree(&threadState, THREAD_STATE_FLAGS_NONE); 8530 return NV_ERR_OBJECT_NOT_FOUND; 8531 } 8532 8533 pMemoryManager = GPU_GET_MEMORY_MANAGER(pGpu); 8534 if (pMemoryManager == NULL) 8535 { 8536 _nvGpuOpsLocksRelease(&acquiredLocks); 8537 threadStateFree(&threadState, THREAD_STATE_FLAGS_NONE); 8538 return NV_ERR_OBJECT_NOT_FOUND; 8539 } 8540 8541 if (IS_MIG_IN_USE(pGpu)) 8542 { 8543 KernelMIGManager *pKernelMIGManager = GPU_GET_KERNEL_MIG_MANAGER(pGpu); 8544 8545 status = kmigmgrGetMemoryPartitionHeapFromClient(pGpu, pKernelMIGManager, session->handle, &pHeap); 8546 if (status != NV_OK) 8547 return status; 8548 } 8549 else 8550 pHeap = GPU_GET_HEAP(pGpu); 8551 8552 if (pHeap == NULL) 8553 { 8554 _nvGpuOpsLocksRelease(&acquiredLocks); 8555 threadStateFree(&threadState, THREAD_STATE_FLAGS_NONE); 8556 return NV_ERR_OBJECT_NOT_FOUND; 8557 } 8558 8559 if (!memmgrIsPmaInitialized(pMemoryManager)) 8560 { 8561 _nvGpuOpsLocksRelease(&acquiredLocks); 8562 threadStateFree(&threadState, THREAD_STATE_FLAGS_NONE); 8563 return NV_ERR_OBJECT_NOT_FOUND; 8564 } 8565 8566 *pPmaObject = (void *)&pHeap->pmaObject; 8567 *pPmaStats = (const UvmPmaStatistics *)&pHeap->pmaObject.pmaStats; 8568 8569 _nvGpuOpsLocksRelease(&acquiredLocks); 8570 threadStateFree(&threadState, THREAD_STATE_FLAGS_NONE); 8571 return NV_OK; 8572 } 8573 8574 NV_STATUS nvGpuOpsP2pObjectCreate(struct gpuDevice *device1, 8575 struct gpuDevice *device2, 8576 NvHandle *hP2pObject) 8577 { 8578 NV_STATUS status; 8579 NV503B_ALLOC_PARAMETERS p2pAllocParams = {0}; 8580 NvHandle hTemp = 0; 8581 struct systemP2PCaps p2pCaps; 8582 RM_API *pRmApi = rmapiGetInterface(RMAPI_EXTERNAL_KERNEL); 8583 struct gpuSession *session; 8584 8585 if (!device1 || !device2 || !hP2pObject) 8586 return NV_ERR_INVALID_ARGUMENT; 8587 8588 if (device1->session != device2->session) 8589 return NV_ERR_INVALID_ARGUMENT; 8590 8591 status = getSystemP2PCaps(device1, device2, &p2pCaps); 8592 if (status != NV_OK) 8593 return status; 8594 8595 if (!p2pCaps.accessSupported) 8596 return NV_ERR_NOT_SUPPORTED; 8597 8598 p2pAllocParams.hSubDevice = device1->subhandle; 8599 p2pAllocParams.hPeerSubDevice = device2->subhandle; 8600 8601 session = device1->session; 8602 hTemp = NV01_NULL_OBJECT; 8603 status = pRmApi->Alloc(pRmApi, session->handle, session->handle, &hTemp, NV50_P2P, &p2pAllocParams); 8604 if (status == NV_OK) 8605 *hP2pObject = hTemp; 8606 8607 return status; 8608 } 8609 8610 NV_STATUS nvGpuOpsP2pObjectDestroy(struct gpuSession *session, 8611 NvHandle hP2pObject) 8612 { 8613 NV_STATUS status = NV_OK; 8614 RM_API *pRmApi = rmapiGetInterface(RMAPI_EXTERNAL_KERNEL); 8615 NV_ASSERT(session); 8616 8617 status = pRmApi->Free(pRmApi, session->handle, hP2pObject); 8618 NV_ASSERT(status == NV_OK); 8619 return status; 8620 } 8621 8622 NV_STATUS nvGpuOpsReportNonReplayableFault(struct gpuDevice *device, 8623 const void *pFaultPacket) 8624 { 8625 NV_STATUS status = NV_OK; 8626 NV2080_CTRL_GPU_REPORT_NON_REPLAYABLE_FAULT_PARAMS params; 8627 RM_API *pRmApi = rmapiGetInterface(RMAPI_EXTERNAL_KERNEL); 8628 8629 if (device == NULL || pFaultPacket == NULL) 8630 return NV_ERR_INVALID_ARGUMENT; 8631 8632 portMemSet(¶ms, 0, sizeof(params)); 8633 8634 portMemCopy(¶ms.faultPacket.data, 8635 NV2080_CTRL_GPU_FAULT_PACKET_SIZE, 8636 pFaultPacket, 8637 NV2080_CTRL_GPU_FAULT_PACKET_SIZE); 8638 8639 status = pRmApi->Control(pRmApi, 8640 device->session->handle, 8641 device->subhandle, 8642 NV2080_CTRL_CMD_GPU_REPORT_NON_REPLAYABLE_FAULT, 8643 ¶ms, 8644 sizeof(params)); 8645 if (status != NV_OK) 8646 { 8647 NV_PRINTF(LEVEL_ERROR, 8648 "%s: NV2080_CTRL_CMD_GPU_REPORT_NON_REPLAYABLE_FAULTreturned error %s!\n", 8649 __FUNCTION__, nvstatusToString(status)); 8650 } 8651 8652 return status; 8653 } 8654 8655 NV_STATUS nvGpuOpsPagingChannelAllocate(struct gpuDevice *device, 8656 const gpuPagingChannelAllocParams *params, 8657 gpuPagingChannelHandle *channelHandle, 8658 gpuPagingChannelInfo *channelInfo) 8659 { 8660 NV_STATUS status, status2; 8661 UvmGpuPagingChannel *channel = NULL; 8662 Device *pDevice; 8663 RsClient *pClient; 8664 NvHandle hClient; 8665 NvLength errorNotifierSize; 8666 NvU64 paOffset; 8667 gpuAllocInfo allocInfo = {0}; 8668 RM_API *pRmApi = rmapiGetInterface(RMAPI_EXTERNAL_KERNEL); 8669 NvU32 pid = osGetCurrentProcess(); 8670 8671 if (!device || !params || !channelHandle || !channelInfo) 8672 return NV_ERR_INVALID_ARGUMENT; 8673 8674 if (!NV2080_ENGINE_TYPE_IS_COPY(NV2080_ENGINE_TYPE_COPY(params->engineIndex))) 8675 return NV_ERR_INVALID_ARGUMENT; 8676 8677 hClient = device->session->handle; 8678 NV_ASSERT(hClient); 8679 8680 channel = portMemAllocNonPaged(sizeof(*channel)); 8681 if (!channel) 8682 return NV_ERR_NO_MEMORY; 8683 8684 portMemSet(channel, 0, sizeof(*channel)); 8685 channel->device = device; 8686 8687 errorNotifierSize = sizeof(NvNotification) * 8688 NV_CHANNELGPFIFO_NOTIFICATION_TYPE__SIZE_1; 8689 status = nvGpuOpsAllocPhysical(device, 8690 NV_TRUE, 8691 errorNotifierSize, 8692 &paOffset, 8693 &allocInfo); 8694 if (status != NV_OK) 8695 goto cleanup_free_channel; 8696 8697 channel->errorNotifierHandle = allocInfo.hPhysHandle; 8698 NV_ASSERT(channel->errorNotifierHandle); 8699 8700 status = pRmApi->MapToCpu(pRmApi, 8701 hClient, 8702 device->subhandle, 8703 channel->errorNotifierHandle, 8704 0, 8705 errorNotifierSize, 8706 (void **)&channel->errorNotifier, 8707 0); 8708 if (status != NV_OK) 8709 goto cleanup_free_error_notifier; 8710 8711 NV_ASSERT(channel->errorNotifier); 8712 8713 // Ideally, we need to acquire there locks (in that order): 8714 // a. RM API lock 8715 // b. device->handle GPU lock 8716 // c. RPC lock 8717 // (b) GPU lock is optional because RM will acquire all needed locks automatically. 8718 // (c) RPC lock is optional because currently there is no scenario in which channel allocation/destruction 8719 // can be run concurrently with any other SR-IOV heavy API that results on an RPC (Map/Unmap/PushStream). 8720 // 8721 // However, if we acquire GPU locks, NV_RM_RPC_UVM_PAGING_CHANNEL_ALLOCATE would fail. 8722 // It's because PAGING_CHANNEL_ALLOCATE allocates AMPERE_CHANNEL_GPFIFO_A, that allocates 8723 // KernelChannelGroupApi. KernelChannelGroupApi would fail because 8724 // 'TSG alloc should be called without acquiring GPU lock'. 8725 // KernelChannelGroupApi acquires GPU locks manually after allocating TSG. 8726 // 8727 // The TSG allocation requirement just described not only precludes the acquisition 8728 // of any GPU lock in this function, but also the acquisition of the RPC lock, 8729 // because it would result on a lock order violation: the RPC lock is acquired 8730 // before the GPU lock. As a result, nvGpuOpsPagingChannelAllocate only acquires 8731 // the RM API lock, and so does nvGpuOpsPagingChannelDestroy. 8732 status = rmapiLockAcquire(RMAPI_LOCK_FLAGS_READ, RM_LOCK_MODULES_GPU_OPS); 8733 if (status != NV_OK) 8734 goto cleanup_unmap_error_notifier; 8735 8736 status = serverGetClientUnderLock(&g_resServ, hClient, &pClient); 8737 if (status != NV_OK) 8738 goto cleanup_under_rmapi_lock; 8739 8740 status = deviceGetByHandle(pClient, device->handle, &pDevice); 8741 if (status != NV_OK) 8742 goto cleanup_under_rmapi_lock; 8743 8744 channel->pDevice = pDevice; 8745 8746 GPU_RES_SET_THREAD_BC_STATE(pDevice); 8747 8748 if (status != NV_OK) 8749 goto cleanup_under_rmapi_lock; 8750 8751 rmapiLockRelease(); 8752 8753 *channelHandle = channel; 8754 8755 channelInfo->shadowErrorNotifier = channel->errorNotifier; 8756 8757 return NV_OK; 8758 8759 cleanup_under_rmapi_lock: 8760 rmapiLockRelease(); 8761 8762 cleanup_unmap_error_notifier: 8763 status2 = pRmApi->UnmapFromCpu(pRmApi, 8764 hClient, 8765 device->subhandle, 8766 channel->errorNotifierHandle, 8767 (void *)channel->errorNotifier, 8768 0, 8769 pid); 8770 NV_ASSERT(status2 == NV_OK); 8771 8772 cleanup_free_error_notifier: 8773 pRmApi->Free(pRmApi, hClient, channel->errorNotifierHandle); 8774 8775 cleanup_free_channel: 8776 portMemFree(channel); 8777 8778 return status; 8779 } 8780 8781 void nvGpuOpsPagingChannelDestroy(UvmGpuPagingChannel *channel) 8782 { 8783 NV_STATUS status; 8784 struct gpuDevice *device; 8785 Device *pDevice; 8786 RsClient *pClient; 8787 NvHandle hClient; 8788 RM_API *pRmApi = rmapiGetInterface(RMAPI_EXTERNAL_KERNEL); 8789 NvU32 pid = osGetCurrentProcess(); 8790 8791 NV_ASSERT(channel); 8792 8793 device = channel->device; 8794 NV_ASSERT(device); 8795 8796 hClient = device->session->handle; 8797 NV_ASSERT(hClient); 8798 8799 // We acquire only RM API lock here. See comment in nvGpuOpsPagingChannelAllocate. 8800 status = rmapiLockAcquire(RMAPI_LOCK_FLAGS_READ, RM_LOCK_MODULES_GPU_OPS); 8801 NV_ASSERT(status == NV_OK); 8802 if (status != NV_OK) 8803 { 8804 NV_PRINTF(LEVEL_ERROR, 8805 "%s: rmapiLockAcquire returned error %s!\n", 8806 __FUNCTION__, nvstatusToString(status)); 8807 goto cleanup; 8808 } 8809 8810 status = serverGetClientUnderLock(&g_resServ, hClient, &pClient); 8811 NV_ASSERT(status == NV_OK); 8812 if (status != NV_OK) 8813 { 8814 NV_PRINTF(LEVEL_ERROR, 8815 "%s: serverGetClientUnderLock returned error %s!\n", 8816 __FUNCTION__, nvstatusToString(status)); 8817 goto cleanup_under_rmapi_lock; 8818 } 8819 8820 status = deviceGetByHandle(pClient, device->handle, &pDevice); 8821 NV_ASSERT(status == NV_OK); 8822 if (status != NV_OK) 8823 { 8824 NV_PRINTF(LEVEL_ERROR, 8825 "%s: deviceGetByHandle returned error %s!\n", 8826 __FUNCTION__, nvstatusToString(status)); 8827 goto cleanup_under_rmapi_lock; 8828 } 8829 8830 GPU_RES_SET_THREAD_BC_STATE(pDevice); 8831 8832 cleanup_under_rmapi_lock: 8833 rmapiLockRelease(); 8834 8835 cleanup: 8836 status = pRmApi->UnmapFromCpu(pRmApi, 8837 hClient, 8838 device->subhandle, 8839 channel->errorNotifierHandle, 8840 (void *)channel->errorNotifier, 8841 0, 8842 pid); 8843 NV_ASSERT(status == NV_OK); 8844 if (status != NV_OK) 8845 { 8846 NV_PRINTF(LEVEL_ERROR, 8847 "%s: UnmapFromCpu returned error %s!\n", 8848 __FUNCTION__, nvstatusToString(status)); 8849 } 8850 8851 pRmApi->Free(pRmApi, hClient, channel->errorNotifierHandle); 8852 portMemFree(channel); 8853 } 8854 8855 NV_STATUS nvGpuOpsPagingChannelsMap(struct gpuAddressSpace *srcVaSpace, 8856 NvU64 srcAddress, 8857 struct gpuDevice *device, 8858 NvU64 *dstAddress) 8859 { 8860 NV_STATUS status; 8861 Device *pDevice; 8862 RsClient *pClient; 8863 NvHandle hAllocation; 8864 NvHandle hClient; 8865 nvGpuOpsLockSet acquiredLocks; 8866 8867 if (!srcVaSpace || !device || !dstAddress) 8868 return NV_ERR_INVALID_ARGUMENT; 8869 8870 hClient = device->session->handle; 8871 NV_ASSERT(hClient); 8872 8873 8874 status = getHandleForVirtualAddr(srcVaSpace, srcAddress, NV_TRUE, &hAllocation); 8875 if (status != NV_OK) 8876 return status; 8877 8878 status = _nvGpuOpsLocksAcquire(RMAPI_LOCK_FLAGS_NONE, hClient, NULL, 2, 8879 device->deviceInstance, srcVaSpace->device->deviceInstance, &acquiredLocks); 8880 if (status != NV_OK) 8881 return status; 8882 8883 status = serverGetClientUnderLock(&g_resServ, hClient, &pClient); 8884 if (status != NV_OK) 8885 goto exit_under_locks; 8886 8887 status = deviceGetByHandle(pClient, device->handle, &pDevice); 8888 if (status != NV_OK) 8889 goto exit_under_locks; 8890 8891 GPU_RES_SET_THREAD_BC_STATE(pDevice); 8892 8893 portSyncMutexAcquire(device->pPagingChannelRpcMutex); 8894 8895 portSyncMutexRelease(device->pPagingChannelRpcMutex); 8896 8897 exit_under_locks: 8898 _nvGpuOpsLocksRelease(&acquiredLocks); 8899 8900 return status; 8901 } 8902 8903 void nvGpuOpsPagingChannelsUnmap(struct gpuAddressSpace *srcVaSpace, 8904 NvU64 srcAddress, 8905 struct gpuDevice *device) 8906 { 8907 NV_STATUS status; 8908 Device *pDevice; 8909 RsClient *pClient; 8910 NvHandle hAllocation; 8911 NvHandle hClient; 8912 nvGpuOpsLockSet acquiredLocks; 8913 8914 NV_ASSERT(srcVaSpace && device); 8915 if (!srcVaSpace || !device) 8916 return; 8917 8918 hClient = device->session->handle; 8919 NV_ASSERT(hClient); 8920 8921 status = getHandleForVirtualAddr(srcVaSpace, srcAddress, NV_TRUE, &hAllocation); 8922 NV_ASSERT(status == NV_OK); 8923 if (status != NV_OK) 8924 { 8925 NV_PRINTF(LEVEL_ERROR, 8926 "%s: getHandleForVirtualAddr returned error %s!\n", 8927 __FUNCTION__, nvstatusToString(status)); 8928 return; 8929 } 8930 8931 status = _nvGpuOpsLocksAcquire(RMAPI_LOCK_FLAGS_NONE, hClient, NULL, 2, 8932 device->deviceInstance, srcVaSpace->device->deviceInstance, &acquiredLocks); 8933 if (status != NV_OK) 8934 { 8935 NV_PRINTF(LEVEL_ERROR, 8936 "%s: _nvGpuOpsLocksAcquire returned error %s!\n", 8937 __FUNCTION__, nvstatusToString(status)); 8938 return; 8939 } 8940 8941 status = serverGetClientUnderLock(&g_resServ, hClient, &pClient); 8942 NV_ASSERT(status == NV_OK); 8943 if (status != NV_OK) 8944 { 8945 NV_PRINTF(LEVEL_ERROR, 8946 "%s: serverGetClientUnderLock returned error %s!\n", 8947 __FUNCTION__, nvstatusToString(status)); 8948 goto exit_under_locks; 8949 } 8950 8951 status = deviceGetByHandle(pClient, device->handle, &pDevice); 8952 NV_ASSERT(status == NV_OK); 8953 if (status != NV_OK) 8954 { 8955 NV_PRINTF(LEVEL_ERROR, 8956 "%s: deviceGetByHandle returned error %s!\n", 8957 __FUNCTION__, nvstatusToString(status)); 8958 goto exit_under_locks; 8959 } 8960 8961 GPU_RES_SET_THREAD_BC_STATE(pDevice); 8962 8963 portSyncMutexAcquire(device->pPagingChannelRpcMutex); 8964 8965 portSyncMutexRelease(device->pPagingChannelRpcMutex); 8966 8967 exit_under_locks: 8968 _nvGpuOpsLocksRelease(&acquiredLocks); 8969 } 8970 8971 NV_STATUS nvGpuOpsPagingChannelPushStream(UvmGpuPagingChannel *channel, 8972 char *methodStream, 8973 NvU32 methodStreamSize) 8974 { 8975 NV_STATUS status = NV_OK; 8976 struct gpuDevice *device = NULL; 8977 8978 if (!channel || !methodStream) 8979 return NV_ERR_INVALID_ARGUMENT; 8980 if (methodStreamSize == 0) 8981 return NV_OK; 8982 8983 device = channel->device; 8984 NV_ASSERT(device); 8985 8986 GPU_RES_SET_THREAD_BC_STATE(channel->pDevice); 8987 8988 portSyncMutexAcquire(device->pPagingChannelRpcMutex); 8989 8990 portSyncMutexRelease(device->pPagingChannelRpcMutex); 8991 8992 return status; 8993 } 8994 8995 static NV_STATUS nvGpuOpsGetMemoryByHandle(NvHandle hClient, NvHandle hMemory, Memory **ppMemory) 8996 { 8997 RsClient *pRsClient = NULL; 8998 8999 NV_ASSERT_OK_OR_RETURN(serverGetClientUnderLock(&g_resServ, 9000 hClient, &pRsClient)); 9001 9002 return memGetByHandle(pRsClient, 9003 hMemory, 9004 ppMemory); 9005 } 9006 9007